/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.PigException; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.schema.SchemaMergeException; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.MultiMap; import org.apache.pig.impl.util.Pair; public class LOUnion extends RelationalOperator { private static final long serialVersionUID = 2L; private static Log log = LogFactory.getLog(LOUnion.class); private boolean isOnSchema = false; /** * @param plan * Logical plan this operator is a part of. * @param k * Operator key to assign to this node. */ public LOUnion(LogicalPlan plan, OperatorKey k) { super(plan, k); } public List<LogicalOperator> getInputs() { return mPlan.getPredecessors(this); } @Override public Schema getSchema() throws FrontendException { if (!mIsSchemaComputed) { Collection<LogicalOperator> s = mPlan.getPredecessors(this); log.debug("Number of predecessors in the graph: " + s.size()); try { Iterator<LogicalOperator> iter = s.iterator(); //create merged schema if(isOnSchema){ // this function can be called in parser, before // the foreach statements to project previous columns as //per merged schema are setup. So can't rely just on //regular union's schema merge logic mSchema = createMergedSchemaOnAlias(iter); }else{ //schema for regular union LogicalOperator op = iter.next(); if (null == op) { int errCode = 1006; String msg = "Could not find operator in plan"; throw new FrontendException(msg, errCode, PigException.INPUT, false, null); } if (op.getSchema()!=null) mSchema = new Schema(op.getSchema()); else mSchema = null; while(iter.hasNext()) { op = iter.next(); if(null != mSchema) { mSchema = mSchema.merge(op.getSchema(), false); } else { mSchema = null; break; } } } // set fieldschema parents if(null != mSchema) { for(Schema.FieldSchema fs: mSchema.getFields()) { iter = s.iterator(); while(iter.hasNext()) { LogicalOperator op = iter.next(); Schema opSchema = op.getSchema(); if(null != opSchema) { for(Schema.FieldSchema opFs: opSchema.getFields()) { fs.setParent(opFs.canonicalName, op); } } else { fs.setParent(null, op); } } } } mIsSchemaComputed = true; } catch (FrontendException fe) { mSchema = null; mIsSchemaComputed = false; throw fe; } } return mSchema; } /** * create schema for union-onschema * @param iter * @return * @throws FrontendException */ private Schema createMergedSchemaOnAlias(Iterator<LogicalOperator> iter) throws FrontendException { ArrayList<Schema> schemas = new ArrayList<Schema>(); while(iter.hasNext()){ LogicalOperator lop = iter.next(); Schema sch; sch = lop.getSchema(); if(sch == null) { String msg = "Schema of relation " + lop.getAlias() + " is null." + " UNION ONSCHEMA cannot be used with relations that" + " have null schema."; throw new FrontendException(msg, 1116, PigException.INPUT); } for(Schema.FieldSchema fs : sch.getFields()){ if(fs.alias == null){ String msg = "Schema of relation " + lop.getAlias() + " has a null fieldschema for column(s). Schema :" + sch; throw new FrontendException(msg, 1116, PigException.INPUT); } } schemas.add(sch); } //create the merged schema Schema mergedSchema ; try { mergedSchema = Schema.mergeSchemasByAlias(schemas); }catch(SchemaMergeException e) { String msg = "Error merging schemas for union operator : " + e.getMessage(); throw new FrontendException(msg, 1116, PigException.INPUT, e); } return mergedSchema; } @Override public String name() { return getAliasString() + "Union " + mKey.scope + "-" + mKey.id; } @Override public boolean supportsMultipleInputs() { return true; } @Override public void visit(LOVisitor v) throws VisitorException { v.visit(this); } public byte getType() { return DataType.BAG; } /** * @see org.apache.pig.impl.logicalLayer.LogicalOperator#clone() * Do not use the clone method directly. Operators are cloned when logical plans * are cloned using {@link LogicalPlanCloner} */ @Override protected Object clone() throws CloneNotSupportedException { LOUnion unionClone = (LOUnion)super.clone(); return unionClone; } @Override public ProjectionMap getProjectionMap() { if(mIsProjectionMapComputed) return mProjectionMap; mIsProjectionMapComputed = true; Schema outputSchema; try { outputSchema = getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } if(outputSchema == null) { mProjectionMap = null; return mProjectionMap; } List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this); if(predecessors == null) { mProjectionMap = null; return mProjectionMap; } MultiMap<Integer, ProjectionMap.Column> mapFields = new MultiMap<Integer, ProjectionMap.Column>(); for(int inputNum = 0; inputNum < predecessors.size(); ++inputNum) { LogicalOperator predecessor = predecessors.get(inputNum); Schema inputSchema = null; try { inputSchema = predecessor.getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } if(inputSchema == null) { mProjectionMap = null; return mProjectionMap; } else { for(int inputColumn = 0; inputColumn < inputSchema.size(); ++inputColumn) { mapFields.put(inputColumn, new ProjectionMap.Column(new Pair<Integer, Integer>(inputNum, inputColumn))); } } } mProjectionMap = new ProjectionMap(mapFields, null, null); return mProjectionMap; } @Override public List<RequiredFields> getRequiredFields() { List<LogicalOperator> predecessors = mPlan.getPredecessors(this); if(predecessors == null) { return null; } List<RequiredFields> requiredFields = new ArrayList<RequiredFields>(); for(int inputNum = 0; inputNum < predecessors.size(); ++inputNum) { requiredFields.add(new RequiredFields(true)); } return (requiredFields.size() == 0? null: requiredFields); } @Override public List<RequiredFields> getRelevantInputs(int output, int column) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (output!=0) return null; if (column<0) return null; // if we have schema information, check if output column is valid if (mSchema!=null) { if (column >= mSchema.size()) return null; } List<LogicalOperator> predecessors = mPlan.getPredecessors(this); if (predecessors == null) return null; List<RequiredFields> result = new ArrayList<RequiredFields>(); for (int i=0;i<predecessors.size();i++) { ArrayList<Pair<Integer, Integer>> inputList = new ArrayList<Pair<Integer, Integer>>(); inputList.add(new Pair<Integer, Integer>(i, column)); result.add(new RequiredFields(inputList)); } return result; } @Override public boolean pruneColumns(List<Pair<Integer, Integer>> columns) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (mSchema == null) { log.warn("Cannot prune columns in union, no schema information found"); return false; } // Find maximum pruning among all inputs boolean[] maximumPruned = new boolean[mSchema.size()]; for (Pair<Integer, Integer>pair : columns) { maximumPruned[pair.second] = true; } int maximumNumPruned = 0; for (int i=0;i<maximumPruned.length;i++) { if (maximumPruned[i]) maximumNumPruned++; } List<LogicalOperator> preds = getInputs(); for (int i=0;i<preds.size();i++) { // Build a list of pruned columns for this predecessor boolean[] actualPruned = new boolean[mSchema.size()]; for (Pair<Integer, Integer>pair : columns) { if (pair.first==i) actualPruned[pair.second] = true; } int actualNumPruned = 0; for (int j=0;j<actualPruned.length;j++) { if (actualPruned[j]) actualNumPruned++; } if (actualNumPruned!=maximumNumPruned) { // We need to prune some columns before LOUnion List<Integer> columnsToProject = new ArrayList<Integer>(); int index=0; for (int j=0;j<actualPruned.length;j++) { if (!maximumPruned[j]) { columnsToProject.add(index); index++; } else { if (!actualPruned[j]) index++; } } ((RelationalOperator)preds.get(i)).insertPlainForEachAfter(columnsToProject); } } super.pruneColumns(columns); return true; } /** * @param isOnSchema the isOnSchema to set */ public void setOnSchema(boolean isOnSchema) { this.isOnSchema = isOnSchema; } /** * @return the isOnSchema */ public boolean isOnSchema() { return isOnSchema; } }