/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector.keyseries; import java.io.IOException; import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; /** * A key series of a multiple columns of keys where the keys get serialized. * (Or, it can be 1 column). */ public class VectorKeySeriesMultiSerialized<T extends SerializeWrite> extends VectorKeySeriesSerializedImpl<T> implements VectorKeySeriesSerialized { private static final Logger LOG = LoggerFactory.getLogger( VectorKeySeriesMultiSerialized.class.getName()); private VectorSerializeRow<T> keySerializeRow; private boolean[] hasAnyNulls; public VectorKeySeriesMultiSerialized(T serializeWrite) { super(serializeWrite); } public void init(TypeInfo[] typeInfos, int[] columnNums) throws HiveException { keySerializeRow = new VectorSerializeRow<T>(serializeWrite); keySerializeRow.init(typeInfos, columnNums); hasAnyNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; } @Override public void processBatch(VectorizedRowBatch batch) throws IOException { currentBatchSize = batch.size; Preconditions.checkState(currentBatchSize > 0); // LOG.info("VectorKeySeriesMultiSerialized processBatch size " + currentBatchSize + " numCols " + batch.numCols + " selectedInUse " + batch.selectedInUse); int prevKeyStart = 0; int prevKeyLength; int currentKeyStart = 0; output.reset(); seriesCount = 0; boolean prevKeyIsNull; duplicateCounts[0] = 1; if (batch.selectedInUse) { int[] selected = batch.selected; int index = selected[0]; keySerializeRow.setOutputAppend(output); keySerializeRow.serializeWrite(batch, index); if (keySerializeRow.getIsAllNulls()) { seriesIsAllNull[0] = prevKeyIsNull = true; prevKeyLength = 0; output.setWritePosition(0); nonNullKeyCount = 0; } else { seriesIsAllNull[0] = prevKeyIsNull = false; serializedKeyLengths[0] = currentKeyStart = prevKeyLength = output.getLength(); hasAnyNulls[0] = keySerializeRow.getHasAnyNulls(); nonNullKeyCount = 1; } int keyLength; for (int logical = 1; logical < currentBatchSize; logical++) { index = selected[logical]; keySerializeRow.setOutputAppend(output); keySerializeRow.serializeWrite(batch, index); if (keySerializeRow.getIsAllNulls()) { if (prevKeyIsNull) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = prevKeyIsNull = true; } output.setWritePosition(currentKeyStart); } else { keyLength = output.getLength() - currentKeyStart; if (!prevKeyIsNull && StringExpr.equal( output.getData(), prevKeyStart, prevKeyLength, output.getData(), currentKeyStart, keyLength)) { duplicateCounts[seriesCount]++; output.setWritePosition(currentKeyStart); } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = prevKeyIsNull = false; prevKeyStart = currentKeyStart; serializedKeyLengths[nonNullKeyCount] = prevKeyLength = keyLength; currentKeyStart += keyLength; hasAnyNulls[nonNullKeyCount] = keySerializeRow.getHasAnyNulls(); nonNullKeyCount++; } } } seriesCount++; Preconditions.checkState(seriesCount <= currentBatchSize); } else { keySerializeRow.setOutputAppend(output); keySerializeRow.serializeWrite(batch, 0); if (keySerializeRow.getIsAllNulls()) { seriesIsAllNull[0] = prevKeyIsNull = true; prevKeyLength = 0; output.setWritePosition(0); nonNullKeyCount = 0; } else { seriesIsAllNull[0] = prevKeyIsNull = false; serializedKeyLengths[0] = currentKeyStart = prevKeyLength = output.getLength(); hasAnyNulls[0] = keySerializeRow.getHasAnyNulls(); nonNullKeyCount = 1; } int keyLength; for (int index = 1; index < currentBatchSize; index++) { keySerializeRow.setOutputAppend(output); keySerializeRow.serializeWrite(batch, index); if (keySerializeRow.getIsAllNulls()) { if (prevKeyIsNull) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = prevKeyIsNull = true; } output.setWritePosition(currentKeyStart); } else { keyLength = output.getLength() - currentKeyStart; if (!prevKeyIsNull && StringExpr.equal( output.getData(), prevKeyStart, prevKeyLength, output.getData(), currentKeyStart, keyLength)) { duplicateCounts[seriesCount]++; output.setWritePosition(currentKeyStart); } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = prevKeyIsNull = false; prevKeyStart = currentKeyStart; serializedKeyLengths[nonNullKeyCount] = prevKeyLength = keyLength; currentKeyStart += keyLength; hasAnyNulls[nonNullKeyCount] = keySerializeRow.getHasAnyNulls(); nonNullKeyCount++; } } } seriesCount++; Preconditions.checkState(seriesCount <= currentBatchSize); } // Finally. computeSerializedHashCodes(); positionToFirst(); Preconditions.checkState(validate()); } @Override public void setNextNonNullKey(int nonNullKeyPosition) { super.setNextNonNullKey(nonNullKeyPosition); currentHasAnyNulls = hasAnyNulls[nonNullKeyPosition]; } }