/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; /** * A VectorizedRowBatch is a set of rows, organized with each column * as a vector. It is the unit of query execution, organized to minimize * the cost per row and achieve high cycles-per-instruction. * The major fields are public by design to allow fast and convenient * access by the vectorized query execution code. */ public class VectorizedRowBatch implements Writable { public int numCols; // number of columns public ColumnVector[] cols; // a vector for each column public int size; // number of rows that qualify (i.e. haven't been filtered out) public int[] selected; // array of positions of selected values public int[] projectedColumns; public int projectionSize; private int dataColumnCount; private int partitionColumnCount; /* * If no filtering has been applied yet, selectedInUse is false, * meaning that all rows qualify. If it is true, then the selected[] array * records the offsets of qualifying rows. */ public boolean selectedInUse; // If this is true, then there is no data in the batch -- we have hit the end of input. public boolean endOfFile; /* * This number is carefully chosen to minimize overhead and typically allows * one VectorizedRowBatch to fit in cache. */ public static final int DEFAULT_SIZE = 1024; /* * This number is a safety limit for 32MB of writables. */ public static final int DEFAULT_BYTES = 32 * 1024 * 1024; /** * Return a batch with the specified number of columns. * This is the standard constructor -- all batches should be the same size * * @param numCols the number of columns to include in the batch */ public VectorizedRowBatch(int numCols) { this(numCols, DEFAULT_SIZE); } /** * Return a batch with the specified number of columns and rows. * Only call this constructor directly for testing purposes. * Batch size should normally always be defaultSize. * * @param numCols the number of columns to include in the batch * @param size the number of rows to include in the batch */ public VectorizedRowBatch(int numCols, int size) { this.numCols = numCols; this.size = size; selected = new int[size]; selectedInUse = false; this.cols = new ColumnVector[numCols]; projectedColumns = new int[numCols]; // Initially all columns are projected and in the same order projectionSize = numCols; for (int i = 0; i < numCols; i++) { projectedColumns[i] = i; } dataColumnCount = -1; partitionColumnCount = -1; } public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) { this.dataColumnCount = dataColumnCount; this.partitionColumnCount = partitionColumnCount; } public int getDataColumnCount() { return dataColumnCount; } public int getPartitionColumnCount() { return partitionColumnCount; } /** * Returns the maximum size of the batch (number of rows it can hold) */ public int getMaxSize() { return selected.length; } /** * Return count of qualifying rows. * * @return number of rows that have not been filtered out */ public long count() { return size; } private static String toUTF8(Object o) { if(o == null || o instanceof NullWritable) { return "\\N"; /* as found in LazySimpleSerDe's nullSequence */ } return o.toString(); } @Override public String toString() { if (size == 0) { return ""; } StringBuilder b = new StringBuilder(); b.append("Column vector types: "); for (int k = 0; k < projectionSize; k++) { int projIndex = projectedColumns[k]; ColumnVector cv = cols[projIndex]; if (k > 0) { b.append(", "); } b.append(projIndex); b.append(":"); String colVectorType = null; if (cv instanceof LongColumnVector) { colVectorType = "LONG"; } else if (cv instanceof DoubleColumnVector) { colVectorType = "DOUBLE"; } else if (cv instanceof BytesColumnVector) { colVectorType = "BYTES"; } else if (cv instanceof DecimalColumnVector) { colVectorType = "DECIMAL"; } else if (cv instanceof TimestampColumnVector) { colVectorType = "TIMESTAMP"; } else if (cv instanceof IntervalDayTimeColumnVector) { colVectorType = "INTERVAL_DAY_TIME"; } else if (cv instanceof ListColumnVector) { colVectorType = "LIST"; } else if (cv instanceof MapColumnVector) { colVectorType = "MAP"; } else if (cv instanceof StructColumnVector) { colVectorType = "STRUCT"; } else if (cv instanceof UnionColumnVector) { colVectorType = "UNION"; } else { colVectorType = "Unknown"; } b.append(colVectorType); } b.append('\n'); if (this.selectedInUse) { for (int j = 0; j < size; j++) { int i = selected[j]; b.append('['); for (int k = 0; k < projectionSize; k++) { int projIndex = projectedColumns[k]; ColumnVector cv = cols[projIndex]; if (k > 0) { b.append(", "); } cv.stringifyValue(b, i); } b.append(']'); if (j < size - 1) { b.append('\n'); } } } else { for (int i = 0; i < size; i++) { b.append('['); for (int k = 0; k < projectionSize; k++) { int projIndex = projectedColumns[k]; ColumnVector cv = cols[projIndex]; if (k > 0) { b.append(", "); } if (cv != null) { try { cv.stringifyValue(b, i); } catch (Exception ex) { b.append("<invalid>"); } } } b.append(']'); if (i < size - 1) { b.append('\n'); } } } return b.toString(); } @Override public void readFields(DataInput arg0) throws IOException { throw new UnsupportedOperationException("Do you really need me?"); } @Override public void write(DataOutput arg0) throws IOException { throw new UnsupportedOperationException("Don't call me"); } /** * Resets the row batch to default state * - sets selectedInUse to false * - sets size to 0 * - sets endOfFile to false * - resets each column * - inits each column */ public void reset() { selectedInUse = false; size = 0; endOfFile = false; for (ColumnVector vc : cols) { if (vc != null) { vc.reset(); vc.init(); } } } /** * Set the maximum number of rows in the batch. * Data is not preserved. */ public void ensureSize(int rows) { for(int i=0; i < cols.length; ++i) { cols[i].ensureSize(rows, false); } } }