/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *  */ package com.ibm.bi.dml.hops.rewrite; import java.util.ArrayList; import com.ibm.bi.dml.hops.DataOp; import com.ibm.bi.dml.hops.Hop; import com.ibm.bi.dml.hops.Hop.DataOpTypes; import com.ibm.bi.dml.hops.Hop.FileFormatTypes; import com.ibm.bi.dml.hops.Hop.VisitStatus; import com.ibm.bi.dml.hops.HopsException; import com.ibm.bi.dml.parser.DataIdentifier; import com.ibm.bi.dml.parser.StatementBlock; import com.ibm.bi.dml.parser.VariableSet; /** * Rule: Split Hop DAG after CSV reads with unknown size. This is * important to create recompile hooks if format is read from mtd * (we are not able to split it on statementblock creation) and * mtd has unknown size (which can only happen for CSV). * */ public class RewriteSplitDagUnknownCSVRead extends StatementBlockRewriteRule { @Override public ArrayList<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) throws HopsException { ArrayList<StatementBlock> ret = new ArrayList<StatementBlock>(); //collect all unknown csv reads hops ArrayList<Hop> cand = new ArrayList<Hop>(); collectCSVReadHopsUnknownSize( sb.get_hops(), cand ); //split hop dag on demand if( !cand.isEmpty() ) { try { //duplicate sb incl live variable sets StatementBlock sb1 = new StatementBlock(); sb1.setDMLProg(sb.getDMLProg()); sb1.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn()); sb1.setLiveIn(new VariableSet()); sb1.setLiveOut(new VariableSet()); //move csv reads incl reblock to new statement block //(and replace original persistent read with transient read) ArrayList<Hop> sb1hops = new ArrayList<Hop>(); for( Hop c : cand ) { Hop reblock = c; long rlen = reblock.getDim1(); long clen = reblock.getDim2(); long nnz = reblock.getNnz(); long brlen = reblock.getRowsInBlock(); long bclen = reblock.getColsInBlock(); //create new transient read DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, brlen, bclen); HopRewriteUtils.copyLineNumbers(reblock, tread); //replace reblock with transient read ArrayList<Hop> parents = new ArrayList<Hop>(reblock.getParent()); for( int i=0; i<parents.size(); i++ ) { Hop parent = parents.get(i); int pos = HopRewriteUtils.getChildReferencePos(parent, reblock); HopRewriteUtils.removeChildReferenceByPos(parent, reblock, pos); HopRewriteUtils.addChildReference(parent, tread, pos); } //add reblock sub dag to first statement block DataOp twrite = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), reblock, DataOpTypes.TRANSIENTWRITE, null); twrite.setOutputParams(rlen, clen, nnz, brlen, bclen); HopRewriteUtils.copyLineNumbers(reblock, twrite); sb1hops.add(twrite); //update live in and out of new statement block (for piggybacking) DataIdentifier diVar = sb.variablesRead().getVariable(reblock.getName()); if( diVar != null ){ //var read should always exist because persistent read sb1.liveOut().addVariable(reblock.getName(), new DataIdentifier(diVar)); sb.liveIn().addVariable(reblock.getName(), new DataIdentifier(diVar)); } } sb1.set_hops(sb1hops); sb1.updateRecompilationFlag(); ret.add(sb1); //statement block with csv reblocks ret.add(sb); //statement block with remaining hops } catch(Exception ex) { throw new HopsException("Failed to split hops dag for csv read with unknown size.", ex); } LOG.debug("Applied splitDagUnknownCSVRead."); } //keep original hop dag else { ret.add(sb); } return ret; } /** * * @param roots * @param cand */ private void collectCSVReadHopsUnknownSize( ArrayList<Hop> roots, ArrayList<Hop> cand ) { if( roots == null ) return; Hop.resetVisitStatus(roots); for( Hop root : roots ) collectCSVReadHopsUnknownSize(root, cand); } /** * * @param root * @param cand */ private void collectCSVReadHopsUnknownSize( Hop hop, ArrayList<Hop> cand ) { if( hop.getVisited() == VisitStatus.DONE ) return; //collect persistent reads (of type csv, with unknown size) if( hop instanceof DataOp ) { DataOp dop = (DataOp) hop; if( dop.getDataOpType() == DataOpTypes.PERSISTENTREAD && dop.getInputFormatType() == FileFormatTypes.CSV && !dop.dimsKnown() && !HopRewriteUtils.hasOnlyWriteParents(dop, true, false) && !HopRewriteUtils.hasTransformParents(hop) ) { cand.add(dop); } } //process children if( hop.getInput()!=null ) for( Hop c : hop.getInput() ) collectCSVReadHopsUnknownSize(c, cand); hop.setVisited(VisitStatus.DONE); } }