package org.apache.pig.backend.stratosphere.executionengine.contractsLayer; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.io.LineInFormat; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.io.LineOutFormat; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.plans.ScalarPactFinder; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.plans.UDFFinder; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.stubs.PigStubs.PigCrossStub; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.stubs.PigStubs.PigMapStub; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.stubs.PigStubs.PigReduceStub; import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.stubs.PigStubs.PigMatchStub; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.PactOperator; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.plans.PactPlan; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.plans.PactPlanVisitor; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOCoGroup; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOCross; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOFilter; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOLoad; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOMatch; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOReduce; import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOStore; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.NodeIdGenerator; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.UDFContext; import org.apache.pig.PigException; import eu.stratosphere.pact.common.contract.Contract; import eu.stratosphere.pact.common.contract.CrossContract; import eu.stratosphere.pact.common.contract.FileDataSink; import eu.stratosphere.pact.common.contract.FileDataSource; import eu.stratosphere.pact.common.contract.GenericDataSink; import eu.stratosphere.pact.common.contract.MapContract; import eu.stratosphere.pact.common.contract.MatchContract; import eu.stratosphere.pact.common.contract.ReduceContract; import eu.stratosphere.pact.common.type.base.PactString; public class PactCompiler extends PactPlanVisitor { PactPlan plan; Contract[] compiledInputs = null; private Map<PactOperator, Contract> pactOpToContract; private String scope; private Random r; NodeIdGenerator nig; private static final Log log = LogFactory.getLog(PactLauncher.class); PactOperator curOp; //the current operator being compiled private UDFFinder udfFinder; private GenericDataSink sink; PigContext pc; public PactCompiler(PactPlan plan, PigContext pc, UDFContext udfCon) { super(plan, new DepthFirstWalker<PactOperator, PactPlan>(plan)); this.plan = plan; this.pc = pc; UDFContext.setUdfContext(udfCon); compiledInputs = new Contract[plan.size()]; //each PactOperator will become one InputContract nig = NodeIdGenerator.getGenerator(); r = new Random(1331); FileLocalizer.setR(r); List<PactOperator> roots = plan.getRoots(); if((roots == null) || (roots.size() <= 0)) { String msg = "Internal error. Did not find roots in the pact plan."; log.info(msg); } scope = roots.get(0).getOperatorKey().getScope(); pactOpToContract = new HashMap<PactOperator, Contract>(); udfFinder = new UDFFinder(); } public void compilePlan() { try { this.visit(); } catch (VisitorException e) { e.printStackTrace(); } } @Override public void visitLoad(SOLoad ld) throws VisitorException{ try{ curOp = ld; String inputLocation = ld.getInputFile(); ld.setPc(this.pc); FileDataSource source = new FileDataSource(LineInFormat.class, inputLocation, "Pig Source:-"+inputLocation); pactOpToContract.put(ld, source); }catch(Exception e){ int errCode = 2034; String msg = "Error compiling operator " + ld.getClass().getSimpleName(); throw new VisitorException(msg, errCode, PigException.BUG, e); } } @Override public void visitFilter(SOFilter soFilter) throws VisitorException { curOp = soFilter; try{ processUDFs(soFilter.getPlan()); }catch(Exception e){ int errCode = 2034; String msg = "Error compiling operator " + soFilter.getClass().getSimpleName(); throw new VisitorException(msg, errCode, PigException.BUG, e); } List<PactOperator> preds = soFilter.getInputs(); if (!preds.isEmpty()){ MapContract mapper = MapContract.builder(PigMapStub.class) .name("Filter Mapper") .build(); //send parameters to the map stub through the Configuration try { mapper.getParameters().setString("pactOp", ObjectSerializer.serialize(soFilter)); } catch (IOException e) { e.printStackTrace(); } pactOpToContract.put(soFilter, mapper); } } @Override public void visitCross(SOCross soCross) { curOp = soCross; List<PactOperator> preds = soCross.getInputs(); if (!preds.isEmpty()){ CrossContract cross = CrossContract.builder(PigCrossStub.class) .name("cross") .build(); //send parameters to the match stub through the Configuration try { cross.getParameters().setString("pactOp", ObjectSerializer.serialize(soCross)); } catch (IOException e) { e.printStackTrace(); } pactOpToContract.put(soCross, cross); } } @Override public void visitStore(SOStore st) throws VisitorException{ try{ String out = st.getSFile().getFileName(); List<PactOperator> preds = st.getInputs(); if (!preds.isEmpty()){ FileDataSink sink = new FileDataSink(LineOutFormat.class, out, "Pig Data Sink"); pactOpToContract.put(st, sink); //NOTE: this assumes that we only allow one STORE command in the script // should be generalized this.sink = sink; } }catch(Exception e){ int errCode = 2034; String msg = "Error compiling operator " + st.getClass().getSimpleName(); throw new VisitorException(msg, errCode, PigException.BUG, e); } } @Override public void visitMatch(SOMatch soMatch) { curOp = soMatch; List<PactOperator> preds = soMatch.getInputs(); if (!preds.isEmpty()){ MatchContract match = MatchContract.builder(PigMatchStub.class, PactString.class, soMatch.getFirstKeyPosition(), soMatch.getSecondKeyPosition()) .name("match") .build(); //send parameters to the match stub through the Configuration try { match.getParameters().setString("pactOp", ObjectSerializer.serialize(soMatch)); } catch (IOException e) { e.printStackTrace(); } pactOpToContract.put(soMatch, match); } } @Override public void visitReduce(SOReduce soReduce) { curOp = soReduce; List<PactOperator> preds = soReduce.getInputs(); if (!preds.isEmpty()){ //create the Reduce Input Contract ReduceContract reducer = ReduceContract.builder(PigReduceStub.class, PactString.class, soReduce.getFirstKeyPosition()) .name("Reducer") .build(); //send parameters to the reduce stub through the Configuration try { reducer.getParameters().setString("pactOp", ObjectSerializer.serialize(soReduce)); } catch (IOException e) { e.printStackTrace(); } pactOpToContract.put(soReduce, reducer); } } @Override public void visitCoGroup(SOCoGroup soCoGroup) { } private void processUDFs(PactPlan plan) throws VisitorException{ if(plan!=null){ //Process Scalar UDFs with referencedOperators ScalarPactFinder scalarPactFinder = new ScalarPactFinder(plan); scalarPactFinder.visit(); curOp.scalars.addAll(scalarPactFinder.getScalars()); //Process UDFs udfFinder.setPlan(plan); udfFinder.visit(); curOp.UDFs.addAll(udfFinder.getUDFs()); } } public GenericDataSink getSink(){ Set<Entry<PactOperator, Contract>> ops = pactOpToContract.entrySet(); Iterator<Entry<PactOperator, Contract>> iter = ops.iterator(); while(iter.hasNext()){ Entry<PactOperator, Contract> entry = iter.next(); Contract curCon = entry.getValue(); PactOperator curOp = entry.getKey(); List<PactOperator> preds = curOp.getInputs(); List<Contract> inputs = new ArrayList<Contract>(); //Map Contract if (curCon.getClass() == MapContract.class){ if (!preds.isEmpty()){ for(PactOperator po : preds){ inputs.add(pactOpToContract.get(po)); } } MapContract map = (MapContract)curCon; map.addInput(inputs.get(0)); } //Reducer Contract if (curCon.getClass() == ReduceContract.class){ if (!preds.isEmpty()){ for(PactOperator po : preds){ inputs.add(pactOpToContract.get(po)); } } ReduceContract reduce = (ReduceContract)curCon; reduce.addInput(inputs.get(0)); } //Match Contract if (curCon.getClass() == MatchContract.class){ if (!preds.isEmpty()){ for(PactOperator po : preds){ inputs.add(pactOpToContract.get(po)); } } MatchContract match = (MatchContract)curCon; match.addFirstInput(inputs.get(0)); match.addSecondInput(inputs.get(1)); } //Cross Contract if (curCon.getClass() == CrossContract.class){ if (!preds.isEmpty()){ for(PactOperator po : preds){ inputs.add(pactOpToContract.get(po)); } } CrossContract cross = (CrossContract)curCon; cross.addFirstInput(inputs.get(0)); cross.addSecondInput(inputs.get(1)); } //Sink if (curCon == this.sink){ if (!preds.isEmpty()){ for(PactOperator po : preds){ inputs.add(pactOpToContract.get(po)); } } this.sink.addInput(inputs.get(0)); } } return sink; } }