/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.io.IOException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.plan.Operator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.ProjectionMap;
import org.apache.pig.impl.plan.RequiredFields;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.Pair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Parent for all Logical operators.
*/
abstract public class LogicalOperator extends Operator<LOVisitor> {
private static final long serialVersionUID = 2L;
/**
* Schema that defines the output of this operator.
*/
protected Schema mSchema = null;
/**
* A boolean variable to remember if the schema has been computed
*/
protected boolean mIsSchemaComputed = false;
/**
* Datatype of this output of this operator. Operators start out with data
* type set to UNKNOWN, and have it set for them by the type checker.
*/
protected byte mType = DataType.UNKNOWN;
/**
* Requested level of parallelism for this operation.
*/
protected int mRequestedParallelism;
/**
* Name of the record set that results from this operator.
*/
protected String mAlias;
/**
* Logical plan that this operator is a part of.
*/
protected LogicalPlan mPlan;
/**
* ProjectionMap of this operator.
*/
protected ProjectionMap mProjectionMap;
/**
* A boolean variable to remember if the projection map has been computed
*/
protected boolean mIsProjectionMapComputed = false;
/**
* A HashSet to indicate whether an option (such a Join Type) was pinned
* by the user or can be chosen at runtime by the optimizer.
*/
protected HashSet<Integer> mPinnedOptions = new HashSet<Integer>();
/**
* Name of the customPartitioner if one is used, this is set to null otherwise.
*/
protected String mCustomPartitioner = null;
public String getCustomPartitioner() {
return mCustomPartitioner;
}
public void setCustomPartitioner(String customPartitioner) {
this.mCustomPartitioner = customPartitioner;
}
private static Log log = LogFactory.getLog(LogicalOperator.class);
/**
* Equivalent to LogicalOperator(k, 0).
*
* @param plan
* Logical plan this operator is a part of.
* @param k
* Operator key to assign to this node.
*/
public LogicalOperator(LogicalPlan plan, OperatorKey k) {
this(plan, k, -1);
}
/**
* @param plan
* Logical plan this operator is a part of.
* @param k Operator key to assign to this node.
* @param rp degree of requested parallelism with which to execute this
* node.
*/
public LogicalOperator(LogicalPlan plan, OperatorKey k, int rp) {
super(k);
mPlan = plan;
mRequestedParallelism = rp;
}
/**
* Get the operator key for this operator.
*/
public OperatorKey getOperatorKey() {
return mKey;
}
/**
* Set the output schema for this operator. If a schema already exists, an
* attempt will be made to reconcile it with this new schema.
*
* @param schema
* Schema to set.
* @throws ParseException
* if there is already a schema and the existing schema cannot
* be reconciled with this new schema.
*/
public void setSchema(Schema schema) throws FrontendException {
// In general, operators don't generate their schema until they're
// asked, so ask them to do it.
try {
getSchema();
} catch (FrontendException ioe) {
// It's fine, it just means we don't have a schema yet.
}
if (mSchema == null) {
mSchema = schema;
} else {
mSchema.reconcile(schema);
}
}
/**
* Set the parent of the schema field in the schema hierarchy. Currently only used by
* LOStream and LOLoad.
*
* @param schema the schema instance to set parent for
*/
protected final void setParent(Schema schema) {
if( schema == null )
return;
for( Schema.FieldSchema fs : schema.getFields() ) {
fs.setParent( null, this );
setParent( fs.schema );
}
}
/**
* Directly force the schema without reconcilation
* Please use with great care
* @param schema
*/
public void forceSchema(Schema schema) {
this.mSchema = schema;
}
/**
* Unset the schema as if it had not been calculated. This is used by
* anyone who reorganizes the tree and needs to have schemas recalculated.
*/
public void unsetSchema() throws VisitorException {
mIsSchemaComputed = false;
mSchema = null;
}
/**
* Regenerate the schema by unsetting and getting the schema
*/
public Schema regenerateSchema() throws FrontendException, VisitorException {
unsetSchema();
return getSchema();
}
/**
* Calculate canonical names for all fields in the schema. This should
* only be used for loads or other operators that create all new fields.
*/
public void setCanonicalNames() {
for (Schema.FieldSchema fs : mSchema.getFields()) {
fs.canonicalName = CanonicalNamer.getNewName();
}
}
/**
* Get a copy of the schema for the output of this operator.
*/
public abstract Schema getSchema() throws FrontendException;
/**
* Set the type of this operator. This should only be called by the type
* checking routines.
*
* @param t
* Type to set this operator to.
*/
final public void setType(byte t) {
mType = t;
}
/**
* Get the type of this operator.
*/
public byte getType() {
return mType;
}
public String getAlias() {
return mAlias;
}
public String getAliasString() {
return (mAlias == null) ? "" : (mAlias + ": ");
}
public void setAlias(String newAlias) {
mAlias = newAlias;
}
public int getRequestedParallelism() {
return mRequestedParallelism;
}
public void setRequestedParallelism(int newRequestedParallelism) {
mRequestedParallelism = newRequestedParallelism;
}
public void pinOption(Integer opt) {
mPinnedOptions.add(opt);
}
public boolean isPinnedOption(Integer opt) {
return mPinnedOptions.contains(opt);
}
@Override
public String toString() {
StringBuffer msg = new StringBuffer();
msg.append("(Name: " + name() + " Operator Key: " + mKey + ")");
return msg.toString();
}
/**
* Given a schema, reconcile it with our existing schema.
*
* @param schema
* Schema to reconcile with the existing.
* @throws ParseException
* if the reconciliation is not possible.
*/
protected void reconcileSchema(Schema schema) throws ParseException {
if (mSchema == null) {
mSchema = schema;
return;
}
// TODO
}
/**
* Visit this node with the provided visitor. This should only be called by
* the visitor class itself, never directly.
*
* @param v
* Visitor to visit with.
* @throws VisitException
* if the visitor has a problem.
*/
public abstract void visit(LOVisitor v) throws VisitorException;
public LogicalPlan getPlan() {
return mPlan ;
}
/**
* Change the reference to the plan for this operator. Don't use this
* unless you're sure you know what you're doing.
*/
public void setPlan(LogicalPlan plan) {
mPlan = plan;
}
/***
* IMPORTANT:
* This method is only used for unit testing purpose.
*/
public void setSchemaComputed(boolean computed) {
mIsSchemaComputed = computed ;
}
@Override
public boolean supportsMultipleOutputs() {
return true;
}
/**
* @see org.apache.pig.impl.plan.Operator#clone()
* Do not use the clone method directly. Operators are cloned when logical plans
* are cloned using {@link LogicalPlanCloner}
*/
@Override
protected Object clone() throws CloneNotSupportedException {
LogicalOperator loClone = (LogicalOperator)super.clone();
if(mSchema != null)
loClone.mSchema = this.mSchema.clone();
return loClone;
}
}