/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector.keyseries; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import com.google.common.base.Preconditions; /** * A key series of a single column of byte array keys where the keys get serialized. */ public class VectorKeySeriesBytesSerialized<T extends SerializeWrite> extends VectorKeySeriesSerializedImpl<T> implements VectorKeySeriesSerialized { private final int columnNum; private int outputStartPosition; public VectorKeySeriesBytesSerialized(int columnNum, T serializeWrite) { super(serializeWrite); this.columnNum = columnNum; } @Override public void processBatch(VectorizedRowBatch batch) throws IOException { currentBatchSize = batch.size; Preconditions.checkState(currentBatchSize > 0); BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnNum]; byte[][] vectorBytesArrays = bytesColVector.vector; int[] vectorStarts = bytesColVector.start; int[] vectorLengths = bytesColVector.length; // The serialize routine uses this to build serializedKeyLengths. outputStartPosition = 0; output.reset(); if (bytesColVector.isRepeating){ duplicateCounts[0] = currentBatchSize; if (bytesColVector.noNulls || !bytesColVector.isNull[0]) { seriesIsAllNull[0] = false; serialize(0, vectorBytesArrays[0], vectorStarts[0], vectorLengths[0]); nonNullKeyCount = 1; } else { seriesIsAllNull[0] = true; nonNullKeyCount = 0; } seriesCount = 1; Preconditions.checkState(seriesCount <= currentBatchSize); } else { seriesCount = 0; nonNullKeyCount = 0; if (batch.selectedInUse) { int[] selected = batch.selected; if (bytesColVector.noNulls) { duplicateCounts[0] = 1; int index; index = selected[0]; byte[] prevKeyBytes = vectorBytesArrays[index]; int prevKeyStart = vectorStarts[index]; int prevKeyLength = vectorLengths[index]; serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength); int currentKeyStart; int currentKeyLength; byte[] currentKeyBytes; for (int logical = 1; logical < currentBatchSize; logical++) { index = selected[logical]; currentKeyBytes = vectorBytesArrays[index]; currentKeyStart = vectorStarts[index]; currentKeyLength = vectorLengths[index]; if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength); prevKeyBytes = currentKeyBytes; prevKeyStart = currentKeyStart; prevKeyLength = currentKeyLength; } } Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false); nonNullKeyCount = seriesCount; Preconditions.checkState(seriesCount <= currentBatchSize); } else { boolean[] isNull = bytesColVector.isNull; boolean prevKeyIsNull; byte[] prevKeyBytes = null; int prevKeyStart = 0; int prevKeyLength = 0; duplicateCounts[0] = 1; int index = selected[0]; if (isNull[index]) { seriesIsAllNull[0] = true; prevKeyIsNull = true; } else { seriesIsAllNull[0] = false; prevKeyIsNull = false; prevKeyBytes = vectorBytesArrays[index]; prevKeyStart = vectorStarts[index]; prevKeyLength = vectorLengths[index]; serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength); nonNullKeyCount = 1; } int currentKeyStart; int currentKeyLength; byte[] currentKeyBytes; for (int logical = 1; logical < currentBatchSize; logical++) { index = selected[logical]; if (isNull[index]) { if (prevKeyIsNull) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = true; prevKeyIsNull = true; } } else { currentKeyBytes = vectorBytesArrays[index]; currentKeyStart = vectorStarts[index]; currentKeyLength = vectorLengths[index]; if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = false; serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength); prevKeyIsNull = false; prevKeyBytes = currentKeyBytes; prevKeyStart = currentKeyStart; prevKeyLength = currentKeyLength; } } } seriesCount++; Preconditions.checkState(seriesCount <= currentBatchSize); } } else { // NOT selectedInUse if (bytesColVector.noNulls) { duplicateCounts[0] = 1; byte[] prevKeyBytes = vectorBytesArrays[0]; int prevKeyStart = vectorStarts[0]; int prevKeyLength = vectorLengths[0]; serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength); int currentKeyStart; int currentKeyLength; byte[] currentKeyBytes; for (int index = 1; index < currentBatchSize; index++) { currentKeyBytes = vectorBytesArrays[index]; currentKeyStart = vectorStarts[index]; currentKeyLength = vectorLengths[index]; if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength); prevKeyBytes = currentKeyBytes; prevKeyStart = currentKeyStart; prevKeyLength = currentKeyLength; } } Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false); nonNullKeyCount = seriesCount; Preconditions.checkState(seriesCount <= currentBatchSize); } else { boolean[] isNull = bytesColVector.isNull; boolean prevKeyIsNull; byte[] prevKeyBytes = null; int prevKeyStart = 0; int prevKeyLength = 0; duplicateCounts[0] = 1; if (isNull[0]) { seriesIsAllNull[0] = true; prevKeyIsNull = true; } else { seriesIsAllNull[0] = false; prevKeyIsNull = false; prevKeyBytes = vectorBytesArrays[0]; prevKeyStart = vectorStarts[0]; prevKeyLength = vectorLengths[0]; serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength); nonNullKeyCount = 1; } byte[] currentKeyBytes; int currentKeyStart; int currentKeyLength; for (int index = 1; index < currentBatchSize; index++) { if (isNull[index]) { if (prevKeyIsNull) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = true; prevKeyIsNull = true; } } else { currentKeyBytes = vectorBytesArrays[index]; currentKeyStart = vectorStarts[index]; currentKeyLength = vectorLengths[index]; if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) { duplicateCounts[seriesCount]++; } else { duplicateCounts[++seriesCount] = 1; seriesIsAllNull[seriesCount] = false; serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength); prevKeyIsNull = false; prevKeyBytes = currentKeyBytes; prevKeyStart = currentKeyStart; prevKeyLength = currentKeyLength; } } } seriesCount++; Preconditions.checkState(seriesCount <= currentBatchSize); } } } // Finally. computeSerializedHashCodes(); positionToFirst(); Preconditions.checkState(validate()); } private void serialize(int pos, byte[] bytes, int start, int length) throws IOException { serializeWrite.setAppend(output); serializeWrite.writeString(bytes, start, length); int outputNewPosition = output.getLength(); serializedKeyLengths[pos] = outputNewPosition - outputStartPosition; outputStartPosition = outputNewPosition; } }