/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.runtime.instructions.spark;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaPairRDD;
import com.ibm.bi.dml.hops.recompile.Recompiler;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.controlprogram.caching.MatrixObject;
import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContext;
import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext;
import com.ibm.bi.dml.runtime.instructions.Instruction;
import com.ibm.bi.dml.runtime.instructions.InstructionUtils;
import com.ibm.bi.dml.runtime.instructions.cp.CPOperand;
import com.ibm.bi.dml.runtime.instructions.spark.data.RDDProperties;
import com.ibm.bi.dml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock;
import com.ibm.bi.dml.runtime.instructions.spark.utils.RDDAggregateUtils;
import com.ibm.bi.dml.runtime.instructions.spark.utils.RDDConverterUtils;
import com.ibm.bi.dml.runtime.matrix.MatrixCharacteristics;
import com.ibm.bi.dml.runtime.matrix.MatrixFormatMetaData;
import com.ibm.bi.dml.runtime.matrix.data.InputInfo;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.matrix.data.MatrixCell;
import com.ibm.bi.dml.runtime.matrix.data.MatrixIndexes;
import com.ibm.bi.dml.runtime.matrix.operators.Operator;
public class ReblockSPInstruction extends UnarySPInstruction
{
private int brlen;
private int bclen;
private boolean outputEmptyBlocks;
public ReblockSPInstruction(Operator op, CPOperand in, CPOperand out, int br, int bc, boolean emptyBlocks,
String opcode, String instr)
{
super(op, in, out, opcode, instr);
brlen=br;
bclen=bc;
outputEmptyBlocks = emptyBlocks;
}
public static Instruction parseInstruction(String str) throws DMLRuntimeException
{
String parts[] = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if(opcode.compareTo("rblk") != 0) {
throw new DMLRuntimeException("Incorrect opcode for ReblockSPInstruction:" + opcode);
}
CPOperand in = new CPOperand(parts[1]);
CPOperand out = new CPOperand(parts[2]);
int brlen=Integer.parseInt(parts[3]);
int bclen=Integer.parseInt(parts[4]);
boolean outputEmptyBlocks = Boolean.parseBoolean(parts[5]);
Operator op = null; // no operator for ReblockSPInstruction
return new ReblockSPInstruction(op, in, out, brlen, bclen, outputEmptyBlocks, opcode, str);
}
@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
SparkExecutionContext sec = (SparkExecutionContext)ec;
//set the output characteristics
MatrixObject mo = sec.getMatrixObject(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
//get the source format form the meta data
MatrixFormatMetaData iimd = (MatrixFormatMetaData) mo.getMetaData();
if(iimd == null) {
throw new DMLRuntimeException("Error: Metadata not found");
}
//check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if( Recompiler.checkCPReblock(sec, input1.getName()) ) {
Recompiler.executeInMemoryReblock(sec, input1.getName(), output.getName());
return;
}
if(iimd.getInputInfo() == InputInfo.TextCellInputInfo || iimd.getInputInfo() == InputInfo.MatrixMarketInputInfo )
{
//check jdk version (prevent double.parseDouble contention on <jdk8)
sec.checkAndRaiseValidationWarningJDKVersion();
//get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>)
sec.getRDDHandleForVariable(input1.getName(), iimd.getInputInfo());
//convert textcell to binary block
JavaPairRDD<MatrixIndexes, MatrixBlock> out =
RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
else if(iimd.getInputInfo() == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
RDDProperties properties = mo.getRddProperties();
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double missingValue = 0;
if(properties != null) {
hasHeader = properties.isHasHeader();
delim = properties.getDelim();
fill = properties.isFill();
missingValue = properties.getMissingValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, missingValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
return;
}
else if(iimd.getInputInfo()==InputInfo.BinaryCellInputInfo)
{
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iimd.getInputInfo());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
else if(iimd.getInputInfo()==InputInfo.BinaryBlockInputInfo)
{
/// HACK ALERT: Workaround for MLContext
if(mc.getRowsPerBlock() == mcOut.getRowsPerBlock() && mc.getColsPerBlock() == mcOut.getColsPerBlock()) {
if(mo.getRDDHandle() != null) {
JavaPairRDD<MatrixIndexes, MatrixBlock> out = (JavaPairRDD<MatrixIndexes, MatrixBlock>) mo.getRDDHandle().getRDD();
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
return;
}
else {
throw new DMLRuntimeException("Input RDD is not accessible through buffer pool for ReblockSPInstruction:" + iimd.getInputInfo());
}
}
else
{
//BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out =
in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
out = RDDAggregateUtils.mergeByKey( out );
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
else {
throw new DMLRuntimeException("The given InputInfo is not implemented for ReblockSPInstruction:" + iimd.getInputInfo());
}
}
}