/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.FuncSpec; import org.apache.pig.PigException; import org.apache.pig.SortColInfo; import org.apache.pig.SortInfo; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.plan.Operator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.PlanException; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.Pair; public class LOSort extends RelationalOperator { private static final long serialVersionUID = 2L; private List<Boolean> mAscCols; private FuncSpec mSortFunc; private boolean mIsStar = false; private long limit; private List<LogicalPlan> mSortColPlans; private static Log log = LogFactory.getLog(LOSort.class); /** * @param plan * LogicalPlan this operator is a part of. * @param key * OperatorKey for this operator * @param sortColPlans * Array of column numbers that will be used for sorting data. * @param ascCols * Array of booleans. Should be same size as sortCols. True * indicates sort ascending (default), false sort descending. If * this array is null, then all columns will be sorted ascending. * @param sortFunc * the user defined sorting function */ public LOSort( LogicalPlan plan, OperatorKey key, List<LogicalPlan> sortColPlans, List<Boolean> ascCols, FuncSpec sortFunc) { super(plan, key); mSortColPlans = sortColPlans; mAscCols = ascCols; mSortFunc = sortFunc; limit = -1; } public LogicalOperator getInput() { return mPlan.getPredecessors(this).get(0); } public List<LogicalPlan> getSortColPlans() { return mSortColPlans; } public void setSortColPlans(List<LogicalPlan> sortPlans) { mSortColPlans = sortPlans; } public List<Boolean> getAscendingCols() { return mAscCols; } public void setAscendingCols(List<Boolean> ascCols) { mAscCols = ascCols; } public FuncSpec getUserFunc() { return mSortFunc; } public void setUserFunc(FuncSpec func) { mSortFunc = func; } public boolean isStar() { return mIsStar; } public void setStar(boolean b) { mIsStar = b; } public void setLimit(long l) { limit = l; } public long getLimit() { return limit; } public boolean isLimited() { return (limit!=-1); } @Override public String name() { return getAliasString() + "SORT " + mKey.scope + "-" + mKey.id; } @Override public Schema getSchema() throws FrontendException { if (!mIsSchemaComputed) { // get our parent's schema Collection<LogicalOperator> s = mPlan.getPredecessors(this); ArrayList<Schema.FieldSchema> fss = new ArrayList<Schema.FieldSchema>(); try { LogicalOperator op = s.iterator().next(); if (null == op) { int errCode = 1006; String msg = "Could not find operator in plan"; throw new FrontendException(msg, errCode, PigException.INPUT, false, null); } if(op instanceof ExpressionOperator) { Schema.FieldSchema fs = Schema.FieldSchema.copyAndLink(((ExpressionOperator)op).getFieldSchema(), op); if(DataType.isSchemaType(fs.type)) { mSchema = fs.schema; } else { fss.add(fs); mSchema = new Schema(fss); } } else { if (getInput().getSchema()!=null) { mSchema = Schema.copyAndLink( op.getSchema(), op ); } else mSchema = null; } mIsSchemaComputed = true; } catch (FrontendException ioe) { mSchema = null; mIsSchemaComputed = false; throw ioe; } } return mSchema; } @Override public boolean supportsMultipleInputs() { return false; } public void visit(LOVisitor v) throws VisitorException { v.visit(this); } public byte getType() { return DataType.BAG ; } /** * @see org.apache.pig.impl.logicalLayer.LogicalOperator#clone() * Do not use the clone method directly. Operators are cloned when logical plans * are cloned using {@link LogicalPlanCloner} */ @Override protected Object clone() throws CloneNotSupportedException { LOSort clone = (LOSort) super.clone(); // deep copy sort related attributes if(mAscCols != null) { clone.mAscCols = new ArrayList<Boolean>(); for (Iterator<Boolean> it = mAscCols.iterator(); it.hasNext();) { clone.mAscCols.add(Boolean.valueOf(it.next())); } } if(mSortFunc != null) clone.mSortFunc = mSortFunc.clone(); if(mSortColPlans != null) { clone.mSortColPlans = new ArrayList<LogicalPlan>(); for (Iterator<LogicalPlan> it = mSortColPlans.iterator(); it.hasNext();) { LogicalPlanCloneHelper lpCloneHelper = new LogicalPlanCloneHelper(it.next()); clone.mSortColPlans.add(lpCloneHelper.getClonedPlan()); } } return clone; } @Override public ProjectionMap getProjectionMap() { if(mIsProjectionMapComputed) return mProjectionMap; mIsProjectionMapComputed = true; Schema outputSchema; try { outputSchema = getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } Schema inputSchema = null; List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this); if(predecessors != null) { try { inputSchema = predecessors.get(0).getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } } else { mProjectionMap = null; return mProjectionMap; } if(Schema.equals(inputSchema, outputSchema, false, true)) { //there is a one is to one mapping between input and output schemas mProjectionMap = new ProjectionMap(false); return mProjectionMap; } else { //problem - input and output schemas for a sort have to match! mProjectionMap = null; return mProjectionMap; } } @Override public List<RequiredFields> getRequiredFields() { List<RequiredFields> requiredFields = new ArrayList<RequiredFields>(); Set<Pair<Integer, Integer>> fields = new HashSet<Pair<Integer, Integer>>(); Set<LOProject> projectSet = new HashSet<LOProject>(); boolean orderByStar = false; for (LogicalPlan plan : getSortColPlans()) { TopLevelProjectFinder projectFinder = new TopLevelProjectFinder( plan); try { projectFinder.visit(); } catch (VisitorException ve) { requiredFields.clear(); requiredFields.add(null); return requiredFields; } projectSet.addAll(projectFinder.getProjectSet()); if(projectFinder.getProjectStarSet() != null) { orderByStar = true; } } if(orderByStar) { requiredFields.add(new RequiredFields(true)); return requiredFields; } else { for (LOProject project : projectSet) { for (int inputColumn : project.getProjection()) { fields.add(new Pair<Integer, Integer>(0, inputColumn)); } } if(fields.size() == 0) { requiredFields.add(new RequiredFields(false, true)); } else { requiredFields.add(new RequiredFields(new ArrayList<Pair<Integer, Integer>>(fields))); } return (requiredFields.size() == 0? null: requiredFields); } } /* (non-Javadoc) * @see org.apache.pig.impl.plan.Operator#rewire(org.apache.pig.impl.plan.Operator, org.apache.pig.impl.plan.Operator) */ @Override public void rewire(Operator<LOVisitor> oldPred, int oldPredIndex, Operator<LOVisitor> newPred, boolean useOldPred) throws PlanException { super.rewire(oldPred, oldPredIndex, newPred, useOldPred); LogicalOperator previous = (LogicalOperator) oldPred; LogicalOperator current = (LogicalOperator) newPred; for(LogicalPlan plan: mSortColPlans) { try { ProjectFixerUpper projectFixer = new ProjectFixerUpper( plan, previous, oldPredIndex, current, useOldPred, this); projectFixer.visit(); } catch (VisitorException ve) { int errCode = 2144; String msg = "Problem while fixing project inputs during rewiring."; throw new PlanException(msg, errCode, PigException.BUG, ve); } } } public SortInfo getSortInfo() throws FrontendException { Schema schema = this.getSchema(); List<SortColInfo> sortColInfoList = new ArrayList<SortColInfo>(); for (int i = 0; i < mSortColPlans.size(); i++) { LogicalPlan lp = mSortColPlans.get(i); Iterator<LogicalOperator> opsIterator = lp.iterator(); List<LogicalOperator> opsList = new ArrayList<LogicalOperator>(); while(opsIterator.hasNext()) { opsList.add(opsIterator.next()); } if(opsList.size() != 1 || !(opsList.get(0) instanceof LOProject)) { int errCode = 2066; String msg = "Unsupported operator in inner plan: " + opsList.get(0); throw new PlanException(msg, errCode, PigException.BUG); } LOProject project = (LOProject) opsList.get(0); int sortColIndex = project.getCol(); String sortColName = (schema == null) ? null : schema.getField(sortColIndex).alias; sortColInfoList.add(new SortColInfo(sortColName, sortColIndex, mAscCols.get(i)? SortColInfo.Order.ASCENDING : SortColInfo.Order.DESCENDING)); } return new SortInfo(sortColInfoList); } @Override public List<RequiredFields> getRelevantInputs(int output, int column) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (output!=0) return null; if (column<0) return null; // if we have schema information, check if output column is valid if (mSchema!=null) { if (column >= mSchema.size()) return null; } ArrayList<Pair<Integer, Integer>> inputList = new ArrayList<Pair<Integer, Integer>>(); inputList.add(new Pair<Integer, Integer>(0, column)); List<RequiredFields> result = new ArrayList<RequiredFields>(); result.add(new RequiredFields(inputList)); return result; } @Override public boolean pruneColumns(List<Pair<Integer, Integer>> columns) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (mSchema == null) { log .warn("Cannot prune columns in sort, no schema information found"); return false; } List<LogicalOperator> predecessors = mPlan.getPredecessors(this); if (predecessors == null) return false; for (int i=columns.size()-1;i>=0;i--) { Pair<Integer, Integer> column = columns.get(i); if (column.first != 0) { int errCode = 2191; throw new FrontendException( "Sort only take 1 input, cannot prune input with index " + column.first, errCode, PigException.BUG); } if (column.second < 0) { int errCode = 2192; throw new FrontendException("Column to prune does not exist", errCode, PigException.BUG); } for (LogicalPlan plan : mSortColPlans) { pruneColumnInPlan(plan, column.second); } } super.pruneColumns(columns);; return true; } }