package org.apache.pig.backend.stratosphere.executionengine.pactLayer; import java.io.Serializable; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.plans.PactPlan; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.plans.PactPlanVisitor; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.impl.plan.Operator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.pen.Illustrable; import org.apache.pig.pen.Illustrator; import org.apache.pig.pen.util.LineageTracer; public abstract class PactOperator extends Operator<PactPlanVisitor> implements Illustrable, Cloneable, Serializable { // The degree of parallelism requested protected int requestedParallelism; // The inputs that this operator will read data from protected List<PactOperator> inputs; // The outputs that this operator will write data to // Will be used to create Targeted tuples protected List<PactOperator> outputs; // The data type for the results of this operator protected byte resultType = DataType.TUPLE; // The physical plan this operator is part of protected PactPlan parentPlan; // Specifies if the input has been directly attached protected boolean inputAttached = false; // If inputAttached is true, input is set to the input tuple protected Tuple input = null; // The result of performing the operation along with the output protected Result res = null; // alias associated with this PactOperator protected String alias = null; // the key columns (if any) protected int keyPosition1; protected int keyPosition2; public Set<String> UDFs; public Set<PactOperator> scalars; // Will be used by operators to report status or transmit heartbeat // Should be set by the backends to appropriate implementations that // wrap their own version of a reporter. public static PigProgressable reporter; // Will be used by operators to aggregate warning messages // Should be set by the backends to appropriate implementations that // wrap their own version of a logger. protected static PigLogger pigLogger; // Dummy types used to access the getNext of appropriate // type. These will be null static final protected DataByteArray dummyDBA = null; static final protected String dummyString = null; static final protected Double dummyDouble = null; static final protected Float dummyFloat = null; static final protected Integer dummyInt = null; static final protected Long dummyLong = null; static final protected Boolean dummyBool = null; static final protected Tuple dummyTuple = null; static final protected DataBag dummyBag = null; static final protected Map dummyMap = null; // TODO: This is not needed. But a lot of tests check serialized physical plans // that are sensitive to the serialized image of the contained physical operators. // So for now, just keep it. Later it'll be cleansed along with those test golden // files protected LineageTracer lineageTracer; protected transient Illustrator illustrator = null; private boolean accum; private transient boolean accumStart; public PactOperator(OperatorKey k) { super(k); UDFs = new HashSet<String>(); scalars = new HashSet<PactOperator>(); } public PactOperator(OperatorKey k, int rp, List<PactOperator> inp) { super(k); requestedParallelism = rp; inputs = inp; res = new Result(); UDFs = new HashSet<String>(); scalars = new HashSet<PactOperator>(); } public PactOperator(OperatorKey k, int rp) { super(k); requestedParallelism = rp; UDFs = new HashSet<String>(); scalars = new HashSet<PactOperator>(); } protected static final long serialVersionUID = 1L; public void visit(PactPlanVisitor v) throws VisitorException { // TODO Auto-generated method stub } public Result getNext(Tuple t) throws ExecException { // TODO Auto-generated method stub return null; } public int getRequestedParallelism() { return requestedParallelism; } public void setRequestedParallelism(int requestedParallelism) { this.requestedParallelism = requestedParallelism; } public byte getResultType() { return resultType; } public String getAlias() { return alias; } protected String getAliasString() { return (alias == null) ? "" : (alias + ": "); } public void setAlias(String alias) { this.alias = alias; } public void setAccumulative() { accum = true; } public boolean isAccumulative() { return accum; } public void setAccumStart() { if (!accum) { throw new IllegalStateException("Accumulative is not turned on."); } accumStart = true; } public boolean isAccumStarted() { return accumStart; } public void setAccumEnd() { if (!accum){ throw new IllegalStateException("Accumulative is not turned on."); } accumStart = false; } public void setResultType(byte resultType) { this.resultType = resultType; } public List<PactOperator> getInputs() { return inputs; } public void setInputs(List<PactOperator> inputs) { this.inputs = inputs; } public boolean isInputAttached() { return inputAttached; } /** * A generic method for parsing input that either returns the attached input * if it exists or fetches it from its predecessor. If special processing is * required, this method should be overridden. * * @return The Result object that results from processing the input * @throws ExecException */ public Result processInput() throws ExecException { Result res = new Result(); if (input == null && (inputs == null || inputs.size()==0)) { // log.warn("No inputs found. Signaling End of Processing."); res.returnStatus = SOStatus.STATUS_EOP; return res; } if (!isInputAttached()) { res.returnStatus = SOStatus.STATUS_EOP; return res; //AVK //return inputs.get(0).getNext(dummyTuple); //DVK } else { res.result = input; res.returnStatus = (res.result == null ? SOStatus.STATUS_NULL: SOStatus.STATUS_OK); detachInput(); return res; } } /** * Detaches any tuples that are attached * */ public void detachInput() { input = null; this.inputAttached = false; } /** * Shorts the input path of this operator by providing the input tuple * directly * * @param t - * The tuple that should be used as input */ public void attachInput(Tuple t) { input = t; this.inputAttached = true; } public Result getNext(Object obj, byte dataType) throws ExecException { switch (dataType) { case DataType.BAG: return getNext((DataBag) obj); case DataType.BOOLEAN: return getNext((Boolean) obj); case DataType.BYTEARRAY: return getNext((DataByteArray) obj); case DataType.CHARARRAY: return getNext((String) obj); case DataType.DOUBLE: return getNext((Double) obj); case DataType.FLOAT: return getNext((Float) obj); case DataType.INTEGER: return getNext((Integer) obj); case DataType.LONG: return getNext((Long) obj); case DataType.MAP: return getNext((Map) obj); case DataType.TUPLE: return getNext((Tuple) obj); default: throw new ExecException("Unsupported type for getNext: " + DataType.findTypeName(dataType)); } } public Result getNext(Integer i) throws ExecException { return res; } public Result getNext(Long l) throws ExecException { return res; } public Result getNext(Double d) throws ExecException { return res; } public Result getNext(Float f) throws ExecException { return res; } public Result getNext(String s) throws ExecException { return res; } public Result getNext(DataByteArray ba) throws ExecException { return res; } public Result getNext(Map m) throws ExecException { return res; } public Result getNext(Boolean b) throws ExecException { return res; } public Result getNext(DataBag db) throws ExecException { Result ret = null; DataBag tmpBag = BagFactory.getInstance().newDefaultBag(); for(ret = getNext(dummyTuple);ret.returnStatus!=SOStatus.STATUS_EOP;ret=getNext(dummyTuple)){ if(ret.returnStatus == SOStatus.STATUS_ERR) { return ret; } tmpBag.add((Tuple)ret.result); } ret.result = tmpBag; ret.returnStatus = (tmpBag.size() == 0)? SOStatus.STATUS_EOP : SOStatus.STATUS_OK; return ret; } public static Object getDummy(byte dataType) throws ExecException { switch (dataType) { case DataType.BAG: return dummyBag; case DataType.BOOLEAN: return dummyBool; case DataType.BYTEARRAY: return dummyDBA; case DataType.CHARARRAY: return dummyString; case DataType.DOUBLE: return dummyDouble; case DataType.FLOAT: return dummyFloat; case DataType.INTEGER: return dummyFloat; case DataType.LONG: return dummyLong; case DataType.MAP: return dummyMap; case DataType.TUPLE: return dummyTuple; default: throw new ExecException("Unsupported type for getDummy: " + DataType.findTypeName(dataType)); } } protected void cloneHelper(PactOperator op) { resultType = op.resultType; } /** * Reset internal state in an operator. For use in nested pipelines * where operators like limit and sort may need to reset their state. * Limit needs it because it needs to know it's seeing a fresh set of * input. Blocking operators like sort and distinct need it because they * may not have drained their previous input due to a limit and thus need * to be told to drop their old input and start over. */ public void reset() { } public void setFirstKeyPosition(int col) { this.keyPosition1 = col; } public int getFirstKeyPosition() { return this.keyPosition1; } public void setSecondKeyPosition(int col) { this.keyPosition2 = col; } public int getSecondKeyPosition() { return this.keyPosition2; } }