UnionAllRecordBatch.java example

Explorer
drill-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.impl.union;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.ErrorCollector;
import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.ClassGenerator;
import org.apache.drill.exec.expr.CodeGenerator;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.expr.ValueVectorWriteExpression;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.config.UnionAll;
import org.apache.drill.exec.record.AbstractRecordBatch;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.record.WritableBatch;
import org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.record.selection.SelectionVector4;
import org.apache.drill.exec.resolver.TypeCastRules;
import org.apache.drill.exec.vector.AllocationHelper;
import org.apache.drill.exec.vector.FixedWidthVector;
import org.apache.drill.exec.vector.SchemaChangeCallBack;
import org.apache.drill.exec.vector.ValueVector;

import com.google.common.collect.Lists;

public class UnionAllRecordBatch extends AbstractRecordBatch<UnionAll> {
  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UnionAllRecordBatch.class);

  private List<MaterializedField> outputFields;
  private UnionAller unionall;
  private UnionAllInput unionAllInput;
  private RecordBatch current;

  private final List<TransferPair> transfers = Lists.newArrayList();
  private List<ValueVector> allocationVectors;
  protected SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  private int recordCount = 0;
  private boolean schemaAvailable = false;

  public UnionAllRecordBatch(UnionAll config, List<RecordBatch> children, FragmentContext context) throws OutOfMemoryException {
    super(config, context, false);
    assert (children.size() == 2) : "The number of the operands of Union must be 2";
    unionAllInput = new UnionAllInput(this, children.get(0), children.get(1));
  }

  @Override
  public int getRecordCount() {
    return recordCount;
  }

  @Override
  protected void killIncoming(boolean sendUpstream) {
    unionAllInput.getLeftRecordBatch().kill(sendUpstream);
    unionAllInput.getRightRecordBatch().kill(sendUpstream);
  }

  @Override
  public SelectionVector2 getSelectionVector2() {
    throw new UnsupportedOperationException("UnionAllRecordBatch does not support selection vector");
  }

  @Override
  public SelectionVector4 getSelectionVector4() {
    throw new UnsupportedOperationException("UnionAllRecordBatch does not support selection vector");
  }

  @Override
  public IterOutcome innerNext() {
    try {
      IterOutcome upstream = unionAllInput.nextBatch();
      logger.debug("Upstream of Union-All: {}", upstream);
      switch(upstream) {
        case NONE:
        case OUT_OF_MEMORY:
        case STOP:
          return upstream;

        case OK_NEW_SCHEMA:
          outputFields = unionAllInput.getOutputFields();
        case OK:
          IterOutcome workOutcome = doWork();

          if(workOutcome != IterOutcome.OK) {
            return workOutcome;
          } else {
            return upstream;
          }
        default:
          throw new IllegalStateException(String.format("Unknown state %s.", upstream));
      }
    } catch (ClassTransformationException | IOException | SchemaChangeException ex) {
      context.fail(ex);
      killIncoming(false);
      return IterOutcome.STOP;
    }
  }

  @Override
  public WritableBatch getWritableBatch() {
    return WritableBatch.get(this);
  }

  private void setValueCount(int count) {
    for (ValueVector v : allocationVectors) {
      ValueVector.Mutator m = v.getMutator();
      m.setValueCount(count);
    }
  }

  private boolean doAlloc() {
    for (ValueVector v : allocationVectors) {
      try {
        AllocationHelper.allocateNew(v, current.getRecordCount());
      } catch (OutOfMemoryException ex) {
        return false;
      }
    }
    return true;
  }

  @SuppressWarnings("resource")
  private IterOutcome doWork() throws ClassTransformationException, IOException, SchemaChangeException {
    if (allocationVectors != null) {
      for (ValueVector v : allocationVectors) {
        v.clear();
      }
    }

    allocationVectors = Lists.newArrayList();
    transfers.clear();

    // If both sides of Union-All are empty
    if(unionAllInput.isBothSideEmpty()) {
      for(int i = 0; i < outputFields.size(); ++i) {
        final String colName = outputFields.get(i).getPath();
        final MajorType majorType = MajorType.newBuilder()
            .setMinorType(MinorType.INT)
            .setMode(DataMode.OPTIONAL)
            .build();

        MaterializedField outputField = MaterializedField.create(colName, majorType);
        ValueVector vv = container.addOrGet(outputField, callBack);
        allocationVectors.add(vv);
      }

      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
      return IterOutcome.OK_NEW_SCHEMA;
    }

    final ClassGenerator<UnionAller> cg = CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    cg.getCodeGenerator().plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
//    cg.getCodeGenerator().saveCodeForDebugging(true);
    int index = 0;
    for(VectorWrapper<?> vw : current) {
       ValueVector vvIn = vw.getValueVector();
      // get the original input column names
      SchemaPath inputPath = SchemaPath.getSimplePath(vvIn.getField().getPath());
      // get the renamed column names
      SchemaPath outputPath = SchemaPath.getSimplePath(outputFields.get(index).getPath());

      final ErrorCollector collector = new ErrorCollectorImpl();
      // According to input data names, Minortypes, Datamodes, choose to
      // transfer directly,
      // rename columns or
      // cast data types (Minortype or DataMode)
      if (hasSameTypeAndMode(outputFields.get(index), vw.getValueVector().getField())) {
        // Transfer column

        MajorType outputFieldType = outputFields.get(index).getType();
        MaterializedField outputField = MaterializedField.create(outputPath.getAsUnescapedPath(), outputFieldType);

        if (outputFields.get(index).getPath().equals(inputPath.getAsUnescapedPath())) {
          ValueVector vvOut = container.addOrGet(outputField);
          TransferPair tp = vvIn.makeTransferPair(vvOut);
          transfers.add(tp);
        // Copy data in order to rename the column
        } else {
          final LogicalExpression expr = ExpressionTreeMaterializer.materialize(inputPath, current, collector, context.getFunctionRegistry() );
          if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
          }

          ValueVector vv = container.addOrGet(outputField, callBack);
          allocationVectors.add(vv);
          TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getPath()));
          ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
          cg.addExpr(write);
        }
      // Cast is necessary
      } else {
        LogicalExpression expr = ExpressionTreeMaterializer.materialize(inputPath, current, collector, context.getFunctionRegistry());
        if (collector.hasErrors()) {
          throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
        }

        // If the inputs' DataMode is required and the outputs' DataMode is not required
        // cast to the one with the least restriction
        if(vvIn.getField().getType().getMode() == DataMode.REQUIRED
            && outputFields.get(index).getType().getMode() != DataMode.REQUIRED) {
          expr = ExpressionTreeMaterializer.convertToNullableType(expr, vvIn.getField().getType().getMinorType(), context.getFunctionRegistry(), collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
          }
        }

        // If two inputs' MinorTypes are different,
        // Insert a cast before the Union operation
        if(vvIn.getField().getType().getMinorType() != outputFields.get(index).getType().getMinorType()) {
          expr = ExpressionTreeMaterializer.addCastExpression(expr, outputFields.get(index).getType(), context.getFunctionRegistry(), collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
          }
        }

        final MaterializedField outputField = MaterializedField.create(outputPath.getAsUnescapedPath(), expr.getMajorType());
        ValueVector vector = container.addOrGet(outputField, callBack);
        allocationVectors.add(vector);
        TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getPath()));

        boolean useSetSafe = !(vector instanceof FixedWidthVector);
        ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe);
        cg.addExpr(write);
      }
      ++index;
    }

    unionall = context.getImplementationClass(cg.getCodeGenerator());
    unionall.setup(context, current, this, transfers);

    if(!schemaAvailable) {
      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
      schemaAvailable = true;
    }

    if(!doAlloc()) {
      return IterOutcome.OUT_OF_MEMORY;
    }

    recordCount = unionall.unionRecords(0, current.getRecordCount(), 0);
    setValueCount(recordCount);
    return IterOutcome.OK;
  }

  public static boolean hasSameTypeAndMode(MaterializedField leftField, MaterializedField rightField) {
    return (leftField.getType().getMinorType() == rightField.getType().getMinorType())
        && (leftField.getType().getMode() == rightField.getType().getMode());
  }

  // This method is used by inner class to point the reference `current` to the correct record batch
  private void setCurrentRecordBatch(RecordBatch target) {
    this.current = target;
  }

  // This method is used by inner class to clear the current record batch
  private void clearCurrentRecordBatch() {
    for(VectorWrapper<?> v: current) {
      v.clear();
    }
  }

  public static class UnionAllInput {
    private UnionAllRecordBatch unionAllRecordBatch;
    private List<MaterializedField> outputFields;
    private OneSideInput leftSide;
    private OneSideInput rightSide;
    private IterOutcome upstream = IterOutcome.NOT_YET;
    private boolean leftIsFinish = false;
    private boolean rightIsFinish = false;

    // These two schemas are obtained from the first record batches of the left and right inputs
    // They are used to check if the schema is changed between recordbatches
    private BatchSchema leftSchema;
    private BatchSchema rightSchema;
    private boolean bothEmpty = false;

    public UnionAllInput(UnionAllRecordBatch unionAllRecordBatch, RecordBatch left, RecordBatch right) {
      this.unionAllRecordBatch = unionAllRecordBatch;
      leftSide = new OneSideInput(left);
      rightSide = new OneSideInput(right);
    }

    private void setBothSideEmpty(boolean bothEmpty) {
      this.bothEmpty = bothEmpty;
    }

    private boolean isBothSideEmpty() {
      return bothEmpty;
    }

    public IterOutcome nextBatch() throws SchemaChangeException {
      if(upstream == RecordBatch.IterOutcome.NOT_YET) {
        IterOutcome iterLeft = leftSide.nextBatch();
        switch(iterLeft) {
          case OK_NEW_SCHEMA:
            /*
             * If the first few record batches are all empty,
             * there is no way to tell whether these empty batches are coming from empty files.
             * It is incorrect to infer output types when either side could be coming from empty.
             *
             * Thus, while-loop is necessary to skip those empty batches.
             */
            whileLoop:
            while(leftSide.getRecordBatch().getRecordCount() == 0) {
              iterLeft = leftSide.nextBatch();

              switch(iterLeft) {
                case STOP:
                case OUT_OF_MEMORY:
                  return iterLeft;

                case NONE:
                  // Special Case: The left side was an empty input.
                  leftIsFinish = true;
                  break whileLoop;

                case NOT_YET:
                case OK_NEW_SCHEMA:
                case OK:
                  continue whileLoop;

                default:
                  throw new IllegalStateException(
                      String.format("Unexpected state %s.", iterLeft));
              }
            }

            break;
          case STOP:
          case OUT_OF_MEMORY:
            return iterLeft;

          default:
            throw new IllegalStateException(
                String.format("Unexpected state %s.", iterLeft));
        }

        IterOutcome iterRight = rightSide.nextBatch();
        switch(iterRight) {
          case OK_NEW_SCHEMA:
            // Unless there is no record batch on the left side of the inputs,
            // always start processing from the left side.
            if(leftIsFinish) {
              unionAllRecordBatch.setCurrentRecordBatch(rightSide.getRecordBatch());
            } else {
              unionAllRecordBatch.setCurrentRecordBatch(leftSide.getRecordBatch());
            }
            // If the record count of the first batch from right input is zero,
            // there are two possibilities:
            // 1. The right side is an empty input (e.g., file).
            // 2. There will be more records carried by later batches.

            /*
             * If the first few record batches are all empty,
             * there is no way to tell whether these empty batches are coming from empty files.
             * It is incorrect to infer output types when either side could be coming from empty.
             *
             * Thus, while-loop is necessary to skip those empty batches.
             */
            whileLoop:
            while(rightSide.getRecordBatch().getRecordCount() == 0) {
              iterRight = rightSide.nextBatch();
              switch(iterRight) {
                case STOP:
                case OUT_OF_MEMORY:
                  return iterRight;

                case NONE:
                  // Special Case: The right side was an empty input.
                  rightIsFinish = true;
                  break whileLoop;

                case NOT_YET:
                case OK_NEW_SCHEMA:
                case OK:
                  continue whileLoop;

                default:
                  throw new IllegalStateException(
                      String.format("Unexpected state %s.", iterRight));
              }
            }

            if(leftIsFinish && rightIsFinish) {
              setBothSideEmpty(true);
            }

            inferOutputFields();
            break;

          case STOP:
          case OUT_OF_MEMORY:
            return iterRight;

          default:
            throw new IllegalStateException(
                String.format("Unexpected state %s.", iterRight));
        }



        upstream = IterOutcome.OK_NEW_SCHEMA;
        return upstream;
      } else {
        if(isBothSideEmpty()) {
          return IterOutcome.NONE;
        }

        unionAllRecordBatch.clearCurrentRecordBatch();

        if(leftIsFinish && rightIsFinish) {
          upstream = IterOutcome.NONE;
          return upstream;
        } else if(leftIsFinish) {
          IterOutcome iterOutcome = rightSide.nextBatch();

          switch(iterOutcome) {
            case NONE:
              rightIsFinish = true;
              // fall through
            case STOP:
            case OUT_OF_MEMORY:
              upstream = iterOutcome;
              return upstream;

            case OK_NEW_SCHEMA:
              if(!rightSide.getRecordBatch().getSchema().equals(rightSchema)) {
                throw new SchemaChangeException("Schema change detected in the right input of Union-All. This is not currently supported");
              }
              iterOutcome = IterOutcome.OK;
              // fall through
            case OK:
              unionAllRecordBatch.setCurrentRecordBatch(rightSide.getRecordBatch());
              upstream = iterOutcome;
              return upstream;

            default:
              throw new IllegalStateException(String.format("Unknown state %s.", upstream));
          }
        } else if(rightIsFinish) {
          IterOutcome iterOutcome = leftSide.nextBatch();
          switch(iterOutcome) {
            case STOP:
            case OUT_OF_MEMORY:
            case NONE:
              upstream = iterOutcome;
              return upstream;

            case OK:
              unionAllRecordBatch.setCurrentRecordBatch(leftSide.getRecordBatch());
              upstream = iterOutcome;
              return upstream;

            default:
              throw new IllegalStateException(String.format("Unknown state %s.", iterOutcome));
          }
        } else {
          IterOutcome iterOutcome = leftSide.nextBatch();

          switch(iterOutcome) {
            case STOP:
            case OUT_OF_MEMORY:
              upstream = iterOutcome;
              return upstream;

            case OK_NEW_SCHEMA:
              if(!leftSide.getRecordBatch().getSchema().equals(leftSchema)) {
                throw new SchemaChangeException("Schema change detected in the left input of Union-All. This is not currently supported");
              }

              iterOutcome = IterOutcome.OK;
              // fall through
            case OK:
              unionAllRecordBatch.setCurrentRecordBatch(leftSide.getRecordBatch());
              upstream = iterOutcome;
              return upstream;

            case NONE:
              unionAllRecordBatch.setCurrentRecordBatch(rightSide.getRecordBatch());
              upstream = IterOutcome.OK;
              leftIsFinish = true;
              return upstream;

            default:
              throw new IllegalStateException(String.format("Unknown state %s.", upstream));
          }
        }
      }
    }

    /**
     *
     * Summarize the inference in the four different situations:
     * First of all, the field names are always determined by the left side
     * (Even when the left side is from an empty file, we have the column names.)
     *
     * Cases:
     * 1. Left: non-empty; Right: non-empty
     *      types determined by both sides with implicit casting involved
     * 2. Left: empty; Right: non-empty
     *      type from the right
     * 3. Left: non-empty; Right: empty
     *      types from the left
     * 4. Left: empty; Right: empty
     *      types are nullable integer
     */
    private void inferOutputFields() {
      if(!leftIsFinish && !rightIsFinish) {
        // Both sides are non-empty
        inferOutputFieldsBothSide();
      } else if(!rightIsFinish) {
        // Left side is non-empty
        // While use left side's column names as output column names,
        // use right side's column types as output column types.
        inferOutputFieldsFromSingleSide(
            leftSide.getRecordBatch().getSchema(),
            rightSide.getRecordBatch().getSchema());
      } else {
        // Either right side is empty or both are empty
        // Using left side's schema is sufficient
        inferOutputFieldsFromSingleSide(
            leftSide.getRecordBatch().getSchema(),
            leftSide.getRecordBatch().getSchema());
      }
    }

    // The output table's column names always follow the left table,
    // where the output type is chosen based on DRILL's implicit casting rules
    private void inferOutputFieldsBothSide() {
      outputFields = Lists.newArrayList();
      leftSchema = leftSide.getRecordBatch().getSchema();
      rightSchema = rightSide.getRecordBatch().getSchema();
      Iterator<MaterializedField> leftIter = leftSchema.iterator();
      Iterator<MaterializedField> rightIter = rightSchema.iterator();

      int index = 1;
      while (leftIter.hasNext() && rightIter.hasNext()) {
        MaterializedField leftField  = leftIter.next();
        MaterializedField rightField = rightIter.next();

        if (hasSameTypeAndMode(leftField, rightField)) {
          MajorType.Builder builder = MajorType.newBuilder().setMinorType(leftField.getType().getMinorType()).setMode(leftField.getDataMode());
          builder = Types.calculateTypePrecisionAndScale(leftField.getType(), rightField.getType(), builder);
          outputFields.add(MaterializedField.create(leftField.getPath(), builder.build()));
        } else {
          // If the output type is not the same,
          // cast the column of one of the table to a data type which is the Least Restrictive
          MajorType.Builder builder = MajorType.newBuilder();
          if (leftField.getType().getMinorType() == rightField.getType().getMinorType()) {
            builder.setMinorType(leftField.getType().getMinorType());
            builder = Types.calculateTypePrecisionAndScale(leftField.getType(), rightField.getType(), builder);
          } else {
            List<MinorType> types = Lists.newLinkedList();
            types.add(leftField.getType().getMinorType());
            types.add(rightField.getType().getMinorType());
            MinorType outputMinorType = TypeCastRules.getLeastRestrictiveType(types);
            if (outputMinorType == null) {
              throw new DrillRuntimeException("Type mismatch between " + leftField.getType().getMinorType().toString() +
                  " on the left side and " + rightField.getType().getMinorType().toString() +
                  " on the right side in column " + index + " of UNION ALL");
            }
            builder.setMinorType(outputMinorType);
          }

          // The output data mode should be as flexible as the more flexible one from the two input tables
          List<DataMode> dataModes = Lists.newLinkedList();
          dataModes.add(leftField.getType().getMode());
          dataModes.add(rightField.getType().getMode());
          builder.setMode(TypeCastRules.getLeastRestrictiveDataMode(dataModes));

          outputFields.add(MaterializedField.create(leftField.getPath(), builder.build()));
        }
        ++index;
      }

      assert !leftIter.hasNext() && ! rightIter.hasNext() : "Mis-match of column count should have been detected when validating sqlNode at planning";
    }

    private void inferOutputFieldsFromSingleSide(final BatchSchema schemaForNames, final BatchSchema schemaForTypes) {
      outputFields = Lists.newArrayList();

      final List<String> outputColumnNames = Lists.newArrayList();
      final Iterator<MaterializedField> iterForNames = schemaForNames.iterator();
      while(iterForNames.hasNext()) {
        outputColumnNames.add(iterForNames.next().getPath());
      }

      final Iterator<MaterializedField> iterForTypes = schemaForTypes.iterator();
      for(int i = 0; iterForTypes.hasNext(); ++i) {
        MaterializedField field = iterForTypes.next();
        outputFields.add(MaterializedField.create(outputColumnNames.get(i), field.getType()));
      }
    }

    public List<MaterializedField> getOutputFields() {
      if(outputFields == null) {
        throw new NullPointerException("Output fields have not been inferred");
      }

      return outputFields;
    }

    public void killIncoming(boolean sendUpstream) {
      leftSide.getRecordBatch().kill(sendUpstream);
      rightSide.getRecordBatch().kill(sendUpstream);
    }

    public RecordBatch getLeftRecordBatch() {
      return leftSide.getRecordBatch();
    }

    public RecordBatch getRightRecordBatch() {
      return rightSide.getRecordBatch();
    }

    private class OneSideInput {
      private IterOutcome upstream = IterOutcome.NOT_YET;
      private RecordBatch recordBatch;

      public OneSideInput(RecordBatch recordBatch) {
        this.recordBatch = recordBatch;
      }

      public RecordBatch getRecordBatch() {
        return recordBatch;
      }

      public IterOutcome nextBatch() {
        if(upstream == IterOutcome.NONE) {
          throw new IllegalStateException(String.format("Unknown state %s.", upstream));
        }

        if(upstream == IterOutcome.NOT_YET) {
          upstream = unionAllRecordBatch.next(recordBatch);

          return upstream;
        } else {
          do {
            upstream = unionAllRecordBatch.next(recordBatch);
          } while (upstream == IterOutcome.OK && recordBatch.getRecordCount() == 0);

          return upstream;
        }
      }
    }
  }
}