TableScanDesc.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.plan;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.TableSample;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde.serdeConstants;

/**
 * Table Scan Descriptor Currently, data is only read from a base source as part
 * of map-reduce framework. So, nothing is stored in the descriptor. But, more
 * things will be added here as table scan is invoked as part of local work.
 **/
@Explain(displayName = "TableScan", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class TableScanDesc extends AbstractOperatorDesc {
  private static final long serialVersionUID = 1L;

  private String alias;

  private List<VirtualColumn> virtualCols;
  private String statsAggKeyPrefix;   // stats publishing/aggregating key prefix

 /**
  * A list of the partition columns of the table.
  * Set by the semantic analyzer only in case of the analyze command.
  */
  private List<String> partColumns;

  /**
   * Used for split sampling (row count per split)
   * For example,
   *   select count(1) from ss_src2 tablesample (10 ROWS) s;
   * provides first 10 rows from all input splits
   */
  private int rowLimit = -1;

  /**
   * A boolean variable set to true by the semantic analyzer only in case of the analyze command.
   *
   */
  private boolean gatherStats;
  private boolean statsReliable;
  private String tmpStatsDir;

  private ExprNodeGenericFuncDesc filterExpr;
  private transient Serializable filterObject;
  private String serializedFilterExpr;
  private String serializedFilterObject;


  // Both neededColumnIDs and neededColumns should never be null.
  // When neededColumnIDs is an empty list,
  // it means no needed column (e.g. we do not need any column to evaluate
  // SELECT count(*) FROM t).
  private List<Integer> neededColumnIDs;
  private List<String> neededColumns;
  private List<String> neededNestedColumnPaths;

  // all column names referenced including virtual columns. used in ColumnAccessAnalyzer
  private transient List<String> referencedColumns;

  public static final String FILTER_EXPR_CONF_STR =
    "hive.io.filter.expr.serialized";

  public static final String FILTER_TEXT_CONF_STR =
    "hive.io.filter.text";

  public static final String FILTER_OBJECT_CONF_STR =
    "hive.io.filter.object";

  // input file name (big) to bucket number
  private Map<String, Integer> bucketFileNameMapping;

  private boolean isMetadataOnly = false;

  private boolean isAcidTable;

  private AcidUtils.AcidOperationalProperties acidOperationalProperties = null;

  private transient TableSample tableSample;

  private transient Table tableMetadata;

  private BitSet includedBuckets;

  private int numBuckets = -1;

  public TableScanDesc() {
    this(null, null);
  }

  @SuppressWarnings("nls")
  public TableScanDesc(Table tblMetadata) {
    this(null, tblMetadata);
  }

  public TableScanDesc(final String alias, Table tblMetadata) {
    this(alias, null, tblMetadata);
  }

  public TableScanDesc(final String alias, List<VirtualColumn> vcs, Table tblMetadata) {
    this.alias = alias;
    this.virtualCols = vcs;
    this.tableMetadata = tblMetadata;
    isAcidTable = AcidUtils.isAcidTable(this.tableMetadata);
    if (isAcidTable) {
      acidOperationalProperties = AcidUtils.getAcidOperationalProperties(this.tableMetadata);
    }
  }

  @Override
  public Object clone() {
    List<VirtualColumn> vcs = new ArrayList<VirtualColumn>(getVirtualCols());
    return new TableScanDesc(getAlias(), vcs, this.tableMetadata);
  }

  @Explain(displayName = "alias")
  public String getAlias() {
    return alias;
  }

  @Explain(explainLevels = { Level.USER })
  public String getTbl() {
    StringBuffer sb = new StringBuffer();
    sb.append(this.tableMetadata.getCompleteName());
    sb.append("," + alias);
    if (isAcidTable()) {
      sb.append(", ACID table");
    }
    sb.append(",Tbl:");
    sb.append(this.statistics.getBasicStatsState());
    sb.append(",Col:");
    sb.append(this.statistics.getColumnStatsState());
    return sb.toString();
  }

  public boolean isAcidTable() {
    return isAcidTable;
  }

  public AcidUtils.AcidOperationalProperties getAcidOperationalProperties() {
    return acidOperationalProperties;
  }

  @Explain(displayName = "Output", explainLevels = { Level.USER })
  public List<String> getOutputColumnNames() {
    return this.neededColumns;
  }

  @Explain(displayName = "filterExpr")
  public String getFilterExprString() {
    return PlanUtils.getExprListString(Arrays.asList(filterExpr));
  }

  public ExprNodeGenericFuncDesc getFilterExpr() {
    return filterExpr;
  }

  public void setFilterExpr(ExprNodeGenericFuncDesc filterExpr) {
    this.filterExpr = filterExpr;
  }

  public Serializable getFilterObject() {
    return filterObject;
  }

  public void setFilterObject(Serializable filterObject) {
    this.filterObject = filterObject;
  }

  public void setNeededColumnIDs(List<Integer> neededColumnIDs) {
    this.neededColumnIDs = neededColumnIDs;
  }

  public List<Integer> getNeededColumnIDs() {
    return neededColumnIDs;
  }

  public List<String> getNeededNestedColumnPaths() {
    return neededNestedColumnPaths;
  }

  public void setNeededNestedColumnPaths(List<String> neededNestedColumnPaths) {
    this.neededNestedColumnPaths = neededNestedColumnPaths;
  }

  public void setNeededColumns(List<String> neededColumns) {
    this.neededColumns = neededColumns;
  }

  public List<String> getNeededColumns() {
    return neededColumns;
  }

  @Explain(displayName = "Pruned Column Paths")
  public List<String> getPrunedColumnPaths() {
    List<String> result = new ArrayList<>();
    for (String p : neededNestedColumnPaths) {
      if (p.indexOf('.') >= 0) {
        result.add(p);
      }
    }
    return result;
  }

  public void setReferencedColumns(List<String> referencedColumns) {
    this.referencedColumns = referencedColumns;
  }

  public List<String> getReferencedColumns() {
    return referencedColumns;
  }

  public void setAlias(String alias) {
    this.alias = alias;
  }

  public void setPartColumns (List<String> partColumns) {
    this.partColumns = partColumns;
  }

  public List<String> getPartColumns () {
    return partColumns;
  }

  public void setGatherStats(boolean gatherStats) {
    this.gatherStats = gatherStats;
  }

  @Explain(displayName = "GatherStats", explainLevels = { Level.EXTENDED })
  public boolean isGatherStats() {
    return gatherStats;
  }

  public String getTmpStatsDir() {
    return tmpStatsDir;
  }

  public void setTmpStatsDir(String tmpStatsDir) {
    this.tmpStatsDir = tmpStatsDir;
  }

  public List<VirtualColumn> getVirtualCols() {
    return virtualCols;
  }

  public void setVirtualCols(List<VirtualColumn> virtualCols) {
    this.virtualCols = virtualCols;
  }

  public void addVirtualCols(List<VirtualColumn> virtualCols) {
    this.virtualCols.addAll(virtualCols);
  }

  public boolean hasVirtualCols() {
    return virtualCols != null && !virtualCols.isEmpty();
  }

  public void setStatsAggPrefix(String k) {
    statsAggKeyPrefix = k;
  }

  @Explain(displayName = "Statistics Aggregation Key Prefix", explainLevels = { Level.EXTENDED })
  public String getStatsAggPrefix() {
    return statsAggKeyPrefix;
  }

  public boolean isStatsReliable() {
    return statsReliable;
  }

  public void setStatsReliable(boolean statsReliable) {
    this.statsReliable = statsReliable;
  }

  public void setRowLimit(int rowLimit) {
    this.rowLimit = rowLimit;
  }

  public int getRowLimit() {
    return rowLimit;
  }

  @Explain(displayName = "Row Limit Per Split")
  public Integer getRowLimitExplain() {
    return rowLimit >= 0 ? rowLimit : null;
  }

  public Map<String, Integer> getBucketFileNameMapping() {
    return bucketFileNameMapping;
  }

  public void setBucketFileNameMapping(Map<String, Integer> bucketFileNameMapping) {
    this.bucketFileNameMapping = bucketFileNameMapping;
  }

  public void setIsMetadataOnly(boolean metadata_only) {
    isMetadataOnly = metadata_only;
  }

  public boolean getIsMetadataOnly() {
    return isMetadataOnly;
  }

  public Table getTableMetadata() {
    return tableMetadata;
  }

  public void setTableMetadata(Table tableMetadata) {
    this.tableMetadata = tableMetadata;
  }

  public TableSample getTableSample() {
    return tableSample;
  }

  public void setTableSample(TableSample tableSample) {
    this.tableSample = tableSample;
  }

  public String getSerializedFilterExpr() {
    return serializedFilterExpr;
  }

  public void setSerializedFilterExpr(String serializedFilterExpr) {
    this.serializedFilterExpr = serializedFilterExpr;
  }

  public String getSerializedFilterObject() {
    return serializedFilterObject;
  }

  public void setSerializedFilterObject(String serializedFilterObject) {
    this.serializedFilterObject = serializedFilterObject;
  }

  public void setIncludedBuckets(BitSet bitset) {
    this.includedBuckets = bitset;
  }

  public BitSet getIncludedBuckets() {
    return this.includedBuckets;
  }

  @Explain(displayName = "buckets included", explainLevels = { Level.EXTENDED })
  public String getIncludedBucketExplain() {
    if (this.includedBuckets == null) {
      return null;
    }

    StringBuilder sb = new StringBuilder();
    sb.append("[");
    for (int i = 0; i < this.includedBuckets.size(); i++) {
      if (this.includedBuckets.get(i)) {
        sb.append(i);
        sb.append(',');
      }
    }
    sb.append(String.format("] of %d", numBuckets));
    return sb.toString();
  }

  public int getNumBuckets() {
    return numBuckets;
  }

  public void setNumBuckets(int numBuckets) {
    this.numBuckets = numBuckets;
  }

  public boolean isNeedSkipHeaderFooters() {
    boolean rtn = false;
    if (tableMetadata != null && tableMetadata.getTTable() != null) {
      Map<String, String> params = tableMetadata.getTTable().getParameters();
      if (params != null) {
        String skipHVal = params.get(serdeConstants.HEADER_COUNT);
        int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal);
        String skipFVal = params.get(serdeConstants.FOOTER_COUNT);
        int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal);
        rtn = (hcount != 0 || fcount !=0 );
      }
    }
    return rtn;
  }

  @Override
  @Explain(displayName = "properties", explainLevels = { Level.DEFAULT, Level.USER, Level.EXTENDED })
  public Map<String, String> getOpProps() {
    return opProps;
  }

  public class TableScanOperatorExplainVectorization extends OperatorExplainVectorization {

    private final TableScanDesc tableScanDesc;
    private final VectorTableScanDesc vectorTableScanDesc;

    public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, VectorDesc vectorDesc) {
      // Native vectorization supported.
      super(vectorDesc, true);
      this.tableScanDesc = tableScanDesc;
      vectorTableScanDesc = (VectorTableScanDesc) vectorDesc;
    }

    @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public String getProjectedOutputColumns() {
      return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns());
    }
  }

  @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
  public TableScanOperatorExplainVectorization getTableScanVectorization() {
    if (vectorDesc == null) {
      return null;
    }
    return new TableScanOperatorExplainVectorization(this, vectorDesc);
  }
}