/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.lops.DataGen;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.compile.JobType;
import org.apache.sysml.lops.runtime.RunMRJobs;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.matrix.JobReturn;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.utils.Statistics;
/*
---------------------------------------------------------------------------------------
JobType Rand RecordReader Mapper Shuffle AggInReducer OtherInReducer
---------------------------------------------------------------------------------------
GMR * * * *
RAND * * * *
REBLOCK * * *
MMCJ * *
MMRJ * * *
CM_COV * *
GROUPED_AGG * *
COMBINE *
SORT * *
PARTITION
---------------------------------------------------------------------------------------
*/
public class MRJobInstruction extends Instruction
{
//public enum JobType {MMCJ, MMRJ, GMR, Partition, RAND, ReBlock, SortKeys, Combine, CMCOV, GroupedAgg};
private JobType jobType;
private String _randInstructions = "";
private String _recordReaderInstructions = "";
private String _mapperInstructions = "";
private String _shuffleInstructions = "";
private String _aggInstructions = "";
private String _otherInstructions = "";
private String[] inputVars;
private String[] outputVars;
private byte [] _resultIndices;
private int iv_numReducers;
private int iv_replication;
private String dimsUnknownFilePrefix;
private double _mapperMem = -1;
/**
* This structure contains the DML script line number
* of each MR instructions within this MR job
*/
private ArrayList<Integer> MRJobInstructionsLineNumbers;
/*
* Following attributes are populated by pulling out information from Symbol Table.
* This is done just before a job is submitted/spawned.
*/
private String[] inputs;
private InputInfo[] inputInfos;
private long[] rlens;
private long[] clens;
private int[] brlens;
private int[] bclens;
private String[] outputs;
private OutputInfo[] outputInfos;
// Member variables to store partitioning-related information for all input matrices
private boolean[] partitioned;
private PDataPartitionFormat[] pformats;
private int[] psizes;
/*
* These members store references to MatrixObjects corresponding to different
* MATRIX variables in inputVars and outputVars, respectively. Note that the
* references to SCALAR input variables are not stored in <code>inputMatrices</code>.
* Every reference in <code>outputMatrices</code> is always points to MATRIX
* since MR jobs always produces matrices.
*/
private MatrixObject[] inputMatrices, outputMatrices;
// Indicates the data type of inputVars
private DataType[] inputDataTypes;
public MRJobInstruction(JobType type)
{
setType(Instruction.INSTRUCTION_TYPE.MAPREDUCE_JOB);
jobType = type;
instOpcode = "MR-Job_"+getJobType();
}
/**
* (deep) Copy constructor, primarily used in parfor.
* Additionally, replace all occurrences of <code>srcPattern</code> with <code>targetPattern</code>
*
* @param that MR job instruction
* @throws IllegalArgumentException if IllegalArgumentException occurs
* @throws IllegalAccessException if IllegalAccessException occurs
*/
public MRJobInstruction(MRJobInstruction that)
throws IllegalArgumentException, IllegalAccessException
{
this( that.jobType );
//copy basic variables
_randInstructions = that._randInstructions;
_recordReaderInstructions = that._recordReaderInstructions;
_mapperInstructions = that._mapperInstructions;
_shuffleInstructions = that._shuffleInstructions;
_aggInstructions = that._aggInstructions;
_otherInstructions = that._otherInstructions;
iv_numReducers = that.iv_numReducers;
iv_replication = that.iv_replication;
dimsUnknownFilePrefix = that.dimsUnknownFilePrefix;
_mapperMem = that._mapperMem;
MRJobInstructionsLineNumbers = that.MRJobInstructionsLineNumbers;
//copy array variables (via clone)
inputVars = that.inputVars.clone();
outputVars = that.outputVars.clone();
_resultIndices = that._resultIndices.clone();
//copy all remaining attributes (these are overwritten by
//extractInputMatrices/extractOutputMatrices but we need to
//shallow copy them for special cases of runtime piggybacking
inputs = that.inputs;
inputInfos = that.inputInfos;
rlens = that.rlens;
clens = that.clens;
brlens = that.brlens;
bclens = that.bclens;
outputs = that.outputs;
outputInfos = that.outputInfos;
partitioned = that.partitioned;
pformats = that.pformats;
psizes = that.psizes;
inputMatrices = that.inputMatrices;
outputMatrices = that.outputMatrices;
inputDataTypes = that.inputDataTypes;
}
public JobType getJobType()
{
return jobType;
}
public String getIv_instructionsInMapper()
{
return _mapperInstructions;
}
public void setIv_instructionsInMapper(String inst)
{
_mapperInstructions = inst;
}
public String getIv_recordReaderInstructions()
{
return _recordReaderInstructions;
}
public void setIv_recordReaderInstructions(String inst)
{
_recordReaderInstructions = inst;
}
public String getIv_randInstructions()
{
return _randInstructions;
}
public void setIv_randInstructions(String inst)
{
_randInstructions = inst;
}
public String getIv_shuffleInstructions()
{
return _shuffleInstructions;
}
public void setIv_shuffleInstructions(String inst)
{
_shuffleInstructions = inst;
}
public String getIv_aggInstructions()
{
return _aggInstructions;
}
public void setIv_aggInstructions(String inst)
{
_aggInstructions = inst;
}
public String getIv_otherInstructions()
{
return _otherInstructions;
}
public void setIv_otherInstructions(String inst)
{
_otherInstructions = inst;
}
public byte[] getIv_resultIndices()
{
return _resultIndices;
}
public int getIv_numReducers()
{
return iv_numReducers;
}
public int getIv_replication()
{
return iv_replication;
}
public double getMemoryRequirements(){
return _mapperMem;
}
public void setMemoryRequirements(double mem) {
_mapperMem = mem;
}
public String getDimsUnknownFilePrefix() {
return dimsUnknownFilePrefix;
}
public void setDimsUnknownFilePrefix(String prefix) {
dimsUnknownFilePrefix = prefix;
}
public String[] getInputVars()
{
return inputVars;
}
public String[] getOutputVars()
{
return outputVars;
}
/**
* Getter for MRJobInstructionslineNumbers
*
* @return list containing all instructions indexed by line number
*/
public ArrayList<Integer> getMRJobInstructionsLineNumbers()
{
return MRJobInstructionsLineNumbers;
}
/**
* Method to set outputs (output indices) for a MapReduce instruction.
*
* @param outputIndices output indices
*/
public void setOutputs(byte[] outputIndices) {
_resultIndices = outputIndices;
}
/**
* Method to set the number of reducers for a MapReducer instruction.
* @param numReducers number of reducers
*/
public void setNumberOfReducers(int numReducers) {
iv_numReducers = numReducers;
}
/**
* Method to set the replication factor for outputs produced from a MapReduce instruction.
*
* @param replication replication factor
*/
public void setReplication(int replication) {
iv_replication = replication;
}
/**
* Method to set input and output labels for a MapReduce instruction.
*
* @param inputLabels input labels
* @param outputLabels output labels
*/
public void setInputOutputLabels(String[] inputLabels, String[] outputLabels) {
this.inputVars = inputLabels;
this.outputVars = outputLabels;
}
public void setRecordReaderInstructions(String rrInstructions) {
_recordReaderInstructions = rrInstructions;
}
public void setMapperInstructions(String mapperInstructions) {
_mapperInstructions = mapperInstructions;
}
public void setShuffleInstructions(String shuffleInstructions) {
_shuffleInstructions = shuffleInstructions;
}
public void setAggregateInstructionsInReducer(String aggInstructions) {
_aggInstructions = aggInstructions;
}
public void setOtherInstructionsInReducer(String otherInstructions) {
_otherInstructions = otherInstructions;
}
public void setRandInstructions(String randInstructions) {
_randInstructions = randInstructions;
}
/**
* Setter for MRJobInstructionslineNumbers field
*
* @param MRJobLineNumbers Line numbers for each instruction in this MRJob
*/
public void setMRJobInstructionsLineNumbers(ArrayList<Integer> MRJobLineNumbers) {
MRJobInstructionsLineNumbers = MRJobLineNumbers;
}
public void setGMRInstructions(String[] inLabels,
String recordReaderInstructions, String mapperInstructions,
String aggInstructions, String otherInstructions, String [] outLabels, byte [] resultIndex,
int numReducers, int replication)
{
setOutputs(resultIndex);
setRecordReaderInstructions(recordReaderInstructions);
setMapperInstructions(mapperInstructions);
setShuffleInstructions("");
setAggregateInstructionsInReducer(aggInstructions);
setOtherInstructionsInReducer(otherInstructions);
setInputOutputLabels(inLabels, outLabels);
setNumberOfReducers(numReducers);
setReplication(replication);
}
public void setReBlockInstructions(String[] inLabels,
String mapperInstructions, String reblockInstructions, String otherInstructions,
String[] outLabels, byte [] resultIndex,
int numReducers, int replication)
{
setOutputs(resultIndex);
setMapperInstructions(mapperInstructions);
setShuffleInstructions(reblockInstructions);
setAggregateInstructionsInReducer("");
setOtherInstructionsInReducer(otherInstructions);
setInputOutputLabels(inLabels, outLabels);
setNumberOfReducers(numReducers);
setReplication(replication);
}
/**
* Search whether or not this MR job contains at least one
* MR instruction with specified line number parameter
*
* @param lineNum Line number in DML script
* @return Return true if found, otherwise return false
*/
public boolean findMRInstructions(int lineNum) {
if (!DMLScript.ENABLE_DEBUG_MODE) {
System.err.println("Error: Expecting debug mode to be enabled for this functionality");
return false;
}
for (Integer lineNumber : MRJobInstructionsLineNumbers) {
if (lineNum == lineNumber)
return true;
}
return false;
}
public String getString(byte [] arr)
{
StringBuilder sb = new StringBuilder();
for(int i = 0; i < arr.length; i++) {
sb.append(",");
sb.append(Byte.toString(arr[i]));
}
return sb.toString();
}
public String toString()
{
String instruction = "";
instruction += "jobtype = " + jobType + " \n";
instruction += "input labels = " + Arrays.toString(inputVars) + " \n";
instruction += "recReader inst = " + _recordReaderInstructions + " \n";
instruction += "rand inst = " + _randInstructions + " \n";
instruction += "mapper inst = " + _mapperInstructions + " \n";
instruction += "shuffle inst = " + _shuffleInstructions + " \n";
instruction += "agg inst = " + _aggInstructions + " \n";
instruction += "other inst = " + _otherInstructions + " \n";
instruction += "output labels = " + Arrays.toString(outputVars) + " \n";
instruction += "result indices = " + getString(_resultIndices) + " \n";
//instruction += "result dims unknown " + getString(iv_resultDimsUnknown) + " \n";
instruction += "num reducers = " + iv_numReducers + " \n";
instruction += "replication = " + iv_replication + " \n";
return instruction;
}
/**
* Method for displaying MR instructions interspersed with source code
* ONLY USED IN DEBUG MODE
*
* @param debug Flag for displaying instructions in debugger test integration
* @return MR string
*/
public String getMRString(boolean debug)
{
if (!DMLScript.ENABLE_DEBUG_MODE) {
System.err.println("Error: Expecting debug mode to be enabled for this functionality");
return "";
}
StringBuilder sb = new StringBuilder();
sb.append("MR-Job[\n");
sb.append("\t\t\t\tjobtype = " + jobType + " \n");
if (!debug) {
sb.append("\t\t\t\tinput labels = ");
sb.append(Arrays.toString(inputVars));
sb.append(" \n");
}
if (_recordReaderInstructions.length() > 0) {
String [] instArray = _recordReaderInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\trecReader inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\trecReader inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++)
{
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_randInstructions.length() > 0) {
String [] instArray = _randInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\trand inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\trand inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_mapperInstructions.length() > 0) {
String [] instArray = _mapperInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug){
sb.append("\t\t\t\tmapper inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tmapper inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++)
{
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_shuffleInstructions.length() > 0) {
String [] instArray = _shuffleInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\tshuffle inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tshuffle inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug) {
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_aggInstructions.length() > 0) {
String [] instArray = _aggInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug)
{
sb.append("\t\t\t\tagg inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tagg inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug) {
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_otherInstructions.length() > 0) {
String [] instArray = _otherInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug)
{
sb.append("\t\t\t\tother inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tother inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (!debug){
sb.append("\t\t\t\toutput labels = ");
sb.append(Arrays.toString(outputVars));
sb.append(" \n");
}
sb.append("\t\t\t ]");
return sb.toString();
}
public void printMe() {
LOG.debug("\nMRInstructions: \n" + this.toString());
}
private String getOps(String inst) {
StringBuilder sb = new StringBuilder();
for ( String i : inst.split(Lop.INSTRUCTION_DELIMITOR)) {
sb.append(",");
sb.append((i.split(Lop.OPERAND_DELIMITOR))[0]);
}
return sb.toString();
}
@Override
public String getGraphString() {
StringBuilder sb = new StringBuilder();
sb.append(jobType);
if (!_mapperInstructions.equals("")) {
sb.append(",map(");
sb.append(getOps(_mapperInstructions));
sb.append(")");
}
if (!_shuffleInstructions.equals("")) {
sb.append(",shuffle(");
sb.append(getOps(_shuffleInstructions));
sb.append(")");
}
if (!_aggInstructions.equals("")) {
sb.append(",agg(");
sb.append(getOps(_aggInstructions));
sb.append(")");
}
if (!_otherInstructions.equals("")) {
sb.append(",other(");
sb.append(getOps(_otherInstructions));
sb.append(")");
}
return sb.toString();
}
public boolean isMapOnly()
{
return ( (_shuffleInstructions == null || _shuffleInstructions.trim().length()==0)
&& (_aggInstructions == null || _aggInstructions.trim().length()==0)
&& (_otherInstructions == null || _otherInstructions.trim().length()==0) );
}
public String[] getInputs() {
return inputs;
}
public InputInfo[] getInputInfos() {
return inputInfos;
}
public long[] getRlens() {
return rlens;
}
public long[] getClens() {
return clens;
}
public int[] getBrlens() {
return brlens;
}
public int[] getBclens() {
return bclens;
}
public String[] getOutputs() {
return outputs;
}
public OutputInfo[] getOutputInfos() {
return outputInfos;
}
public MatrixObject[] getInputMatrices() {
return inputMatrices;
}
public boolean[] getPartitioned() {
return partitioned;
}
public void setPartitioned(boolean[] partitioned) {
this.partitioned = partitioned;
}
public PDataPartitionFormat[] getPformats() {
return pformats;
}
public void setPformats(PDataPartitionFormat[] pformats) {
this.pformats = pformats;
}
public int[] getPsizes() {
return psizes;
}
public void setPsizes(int[] psizes) {
this.psizes = psizes;
}
/**
* Extracts input variables with MATRIX data type, and stores references to
* corresponding matrix objects in <code>inputMatrices</code>. Also, stores
* the data types in <code>inputDataTypes</code>.
*
* @param ec execution context
* @return array of matrix objects
*/
public MatrixObject[] extractInputMatrices(ExecutionContext ec) {
ArrayList<MatrixObject> inputmat = new ArrayList<MatrixObject>();
inputDataTypes = new DataType[inputVars.length];
for ( int i=0; i < inputVars.length; i++ ) {
Data d = ec.getVariable(inputVars[i]);
inputDataTypes[i] = d.getDataType();
if ( d.getDataType() == DataType.MATRIX ) {
inputmat.add((MatrixObject) d);
}
else if( d.getDataType() == DataType.FRAME ) {
//FIXME conversion from frame to matrix object (meta data only) to adhere to
//the given matrix-based mr job submission framework
FrameObject fo = (FrameObject) d;
MatrixObject mo = new MatrixObject(fo.getValueType(), fo.getFileName(), fo.getMetaData());
mo.setFileFormatProperties(fo.getFileFormatProperties());
inputmat.add(mo);
}
}
inputMatrices = inputmat.toArray(new MatrixObject[inputmat.size()]);
// populate auxiliary data structures
populateInputs();
return inputMatrices;
}
public MatrixObject[] getOutputMatrices() {
return outputMatrices;
}
/**
* Extracts MatrixObject references to output variables, all of which will be
* of MATRIX data type, and stores them in <code>outputMatrices</code>. Also,
* populates auxiliary data structures.
*
* @param ec execution context
* @return array of matrix objects
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public MatrixObject[] extractOutputMatrices(ExecutionContext ec) throws DMLRuntimeException {
outputMatrices = new MatrixObject[getOutputVars().length];
int ind = 0;
for(String oo: getOutputVars()) {
Data d = ec.getVariable(oo);
if ( d.getDataType() == DataType.MATRIX ) {
outputMatrices[ind++] = (MatrixObject)d;
}
else {
throw new DMLRuntimeException(getJobType() + ": invalid datatype (" + d.getDataType() + ") for output variable " + oo);
}
}
// populate auxiliary data structures
populateOutputs();
return outputMatrices;
}
/**
* Auxiliary data structures that store information required to spawn MR jobs.
* These data structures are populated by pulling out information from symbol
* table. More specifically, from information stored in <code>inputMatrices</code>
* and <code>outputMatrices</code>.
*/
private void populateInputs() {
// Since inputVars can potentially contain scalar variables,
// auxiliary data structures of size <code>inputMatrices.length</code>
// are allocated instead of size <code>inputVars.length</code>
// Allocate space
inputs = new String[inputMatrices.length];
inputInfos = new InputInfo[inputMatrices.length];
rlens = new long[inputMatrices.length];
clens = new long[inputMatrices.length];
brlens = new int[inputMatrices.length];
bclens = new int[inputMatrices.length];
partitioned = new boolean[inputMatrices.length];
pformats = new PDataPartitionFormat[inputMatrices.length];
psizes = new int[inputMatrices.length];
// populate information
for ( int i=0; i < inputMatrices.length; i++ ) {
inputs[i] = inputMatrices[i].getFileName();
MatrixCharacteristics mc = inputMatrices[i].getMatrixCharacteristics();
rlens[i] = mc.getRows();
clens[i] = mc.getCols();
brlens[i] = mc.getRowsPerBlock();
bclens[i] = mc.getColsPerBlock();
if ( inputMatrices[i].getMetaData() instanceof MatrixFormatMetaData ) {
inputInfos[i] = ((MatrixFormatMetaData) inputMatrices[i].getMetaData()).getInputInfo();
}
else if (inputMatrices[i].getMetaData() instanceof NumItemsByEachReducerMetaData ) {
inputInfos[i] = InputInfo.InputInfoForSortOutput;
inputInfos[i].metadata = inputMatrices[i].getMetaData();
}
partitioned[i] = inputMatrices[i].isPartitioned();
pformats[i] = inputMatrices[i].getPartitionFormat();
psizes[i] = inputMatrices[i].getPartitionSize();
}
}
/**
* Pulls out information from symbol table for output variables (i.e., outputMatrices)
* and populates auxiliary data structutes that are used in setting up MR jobs.
*/
private void populateOutputs() {
// Note: (outputVars.length == outputMatrices.length) -> true
// Allocate space
outputs = new String[outputVars.length];
outputInfos = new OutputInfo[outputVars.length];
// Populate information
for(int i=0; i < outputVars.length; i++) {
outputs[i] = outputMatrices[i].getFileName();
MatrixFormatMetaData md = (MatrixFormatMetaData) outputMatrices[i].getMetaData();
outputInfos[i] = md.getOutputInfo();
}
}
/**
* Prepare current instruction for printing
* by removing internal delimiters.
* @param inst Instruction to be displayed
* @return Post-processed instruction in string format
*/
private static String prepareInstruction(String inst) {
String tmp = inst;
tmp = tmp.replaceAll(Lop.OPERAND_DELIMITOR, " ");
tmp = tmp.replaceAll(Lop.DATATYPE_PREFIX, ".");
tmp = tmp.replaceAll(Lop.INSTRUCTION_DELIMITOR, ", ");
return tmp;
}
public void printCompleteMRJobInstruction(MatrixCharacteristics[] resultStats) throws DMLRuntimeException {
LOG.trace("jobtype" + jobType);
LOG.trace("Inputs: \n");
for(int i=0, mi=0; i < inputVars.length; i++ ) {
if(inputDataTypes[i] == DataType.SCALAR) {
LOG.trace(" " + inputVars[i] + " - SCALAR input (replaced w/ value)");
}
else if ( inputDataTypes[i] == DataType.MATRIX ) {
LOG.trace(" " + inputVars[i] +
" - [" + inputs[mi] +
"] [" + rlens[mi] + ", " + clens[mi] +
"] nnz[" + inputMatrices[mi].getNnz() +
"] block[" + brlens[mi] + ", " + bclens[mi] +
"] [" + InputInfo.inputInfoToString(inputInfos[mi]) +
"]");
mi++;
}
else
LOG.trace(" " + inputVars[i] + " - " + inputDataTypes[i]);
}
LOG.trace(" Instructions:");
if ( !_recordReaderInstructions.equals(""))
LOG.trace(" recReader inst - " + _recordReaderInstructions );
if ( !_randInstructions.equals(""))
LOG.trace(" rand inst - " + _randInstructions );
if ( !_mapperInstructions.equals(""))
LOG.trace(" mapper inst - " + _mapperInstructions );
if ( !_shuffleInstructions.equals(""))
LOG.trace(" shuffle inst - " + _shuffleInstructions );
if ( !_aggInstructions.equals(""))
LOG.trace(" agg inst - " + _aggInstructions );
if ( !_otherInstructions.equals(""))
LOG.trace(" other inst - " + _otherInstructions );
LOG.trace(" Outputs:");
for(int i=0; i < outputVars.length; i++ ) {
LOG.trace(" " + _resultIndices[i] + " : " + outputVars[i] +
" - [" + outputs[i] +
"] [" + resultStats[i].getRows() + ", " + resultStats[i].getCols() +
"] nnz[" + outputMatrices[i].getNnz() +
"] block[" + resultStats[i].getRows() + ", " + resultStats[i].getColsPerBlock() +
"] [" + OutputInfo.outputInfoToString(outputInfos[i]) +
"]");
}
LOG.trace(" #Reducers - " + iv_numReducers);
LOG.trace(" Replication - " + iv_replication);
}
@Override
public void updateInstructionThreadID(String pattern, String replace)
throws DMLRuntimeException
{
if( dimsUnknownFilePrefix!=null )
dimsUnknownFilePrefix = dimsUnknownFilePrefix.replaceAll(pattern, replace);
if( getJobType() == JobType.DATAGEN )
{
//update string representation (because parsing might fail due to pending instruction patching)
String rndinst = getIv_randInstructions().trim();
StringBuilder rndinst2 = new StringBuilder();
if( rndinst!=null && rndinst.length()>0 )
{
String[] instSet = rndinst.split( Lop.INSTRUCTION_DELIMITOR );
for( String dginst : instSet )
{
if( rndinst2.length()>0 )
rndinst2.append(Lop.INSTRUCTION_DELIMITOR);
//handle single instruction
String[] parts = dginst.split(Lop.OPERAND_DELIMITOR);
if( parts==null || parts.length<2 )
throw new DMLRuntimeException("Invalid datagen instruction: "+dginst);
int pos = -1;
if( parts[1].equals(DataGen.RAND_OPCODE) ) pos = 13;
if( parts[1].equals(DataGen.SEQ_OPCODE) ) pos = 11;
if( pos>0 )
{
StringBuilder sb = new StringBuilder();
for( int i=0; i<parts.length; i++ )
{
if( i>0 )
sb.append(Lop.OPERAND_DELIMITOR);
if( i==pos )
sb.append(ProgramConverter.saveReplaceFilenameThreadID(parts[i], pattern, replace));
else
sb.append(parts[i]);
}
rndinst2.append( sb.toString() );
}
else
rndinst2.append( dginst );
}
setRandInstructions(rndinst2.toString());
}
}
}
public boolean isMergableMRJobInstruction( MRJobInstruction that )
{
boolean ret = true;
//check compatible job type (just in case its called with wrong assumptions)
if( jobType != that.jobType ) {
ret = false;
}
//check consistent input representation (other forced into common cell representation)
boolean blockedThis = MRJobConfiguration.deriveRepresentation(inputInfos);
boolean blockedThat = MRJobConfiguration.deriveRepresentation(that.inputInfos);
if( blockedThis != blockedThat ) {
ret = false;
}
//check max memory requirements of mapper instructions
if( (_mapperMem + that._mapperMem)
> OptimizerUtils.getRemoteMemBudgetMap(true) )
{
ret = false;
}
//check max possible byte indexes (worst-case: no sharing)
int maxIx1 = UtilFunctions.max(_resultIndices);
int maxIx2 = UtilFunctions.max(that._resultIndices);
if( (maxIx1+maxIx2) > Byte.MAX_VALUE ) {
ret = false;
}
//TODO conceptually this check should not be necessary
//check map only jobs versus full map-reduce jobs
if( isMapOnly() != that.isMapOnly() ) {
ret = false;
}
return ret;
}
public void mergeMRJobInstruction( MRJobInstruction that )
{
if( LOG.isDebugEnabled() ){
LOG.debug("Current instruction:\n"+this.toString());
LOG.debug("Next instruction:\n"+that.toString());
}
//compute offsets (inputs1, inputs2, intermediates1, intermediates2, outputs1, outputs2)
byte maxIxInst1 = UtilFunctions.max(_resultIndices);
byte maxIxInst2 = UtilFunctions.max(that._resultIndices);
byte sharedIx = 0;
//compute input index map (based on distinct filenames)
HashMap<String, Byte> inMap = new HashMap<String, Byte>();
for( int i=0; i<inputs.length; i++ )
inMap.put(inputs[i], (byte) i);
//compute shared input indexes
for( int i=0; i<that.inputs.length; i++ )
if( inMap.containsKey(that.inputs[i]) )
sharedIx++;
byte lenInputs = (byte)(inputs.length + that.inputs.length - sharedIx);
//compute transition index map for instruction 1
HashMap<Byte, Byte> transMap1 = new HashMap<Byte,Byte>();
for( int i=0; i<inputs.length; i++ )
transMap1.put((byte)i, (byte)i);
for( int i=inputs.length; i<=maxIxInst1; i++ ) //remap intermediates and
{
transMap1.put((byte)i, (byte)(that.inputs.length-sharedIx+i));
}
//compute transition index max for instruction 2
HashMap<Byte, Byte> transMap2 = new HashMap<Byte,Byte>();
byte nextIX = (byte)inputs.length;
for( int i=0; i<that.inputs.length; i++ ) {
if( !inMap.containsKey(that.inputs[i]) )
inMap.put(that.inputs[i], nextIX++);
transMap2.put((byte)i, inMap.get(that.inputs[i]));
}
nextIX = (byte) (lenInputs + (maxIxInst1+1 - inputs.length));
for( int i=that.inputs.length; i<=maxIxInst2; i++ )
{
transMap2.put((byte)i, (byte)nextIX++);
}
//construct merged inputs and meta data
int llen = lenInputs; int len = inputs.length;
int olen = outputs.length+that.outputs.length;
String[] linputs = new String[llen];
InputInfo[] linputInfos = new InputInfo[llen];
MatrixObject[] linputMatrices = new MatrixObject[llen];
PDataPartitionFormat[] lpformats = new PDataPartitionFormat[llen];
long[] lrlens = new long[llen];
long[] lclens = new long[llen];
int[] lbrlens = new int[llen];
int[] lbclens = new int[llen];
String[] loutputs = new String[olen];
OutputInfo[] loutputInfos = new OutputInfo[olen];
MatrixObject[] loutputMatrices = new MatrixObject[olen];
byte[] lresultIndexes = new byte[olen];
System.arraycopy(inputs, 0, linputs, 0, len);
System.arraycopy(inputInfos, 0, linputInfos, 0, len);
System.arraycopy(inputMatrices, 0, linputMatrices, 0, len);
System.arraycopy(pformats, 0, lpformats, 0, len);
System.arraycopy(rlens, 0, lrlens, 0, len);
System.arraycopy(clens, 0, lclens, 0, len);
System.arraycopy(brlens, 0, lbrlens, 0, len);
System.arraycopy(bclens, 0, lbclens, 0, len);
System.arraycopy(outputs, 0, loutputs, 0, outputs.length);
System.arraycopy(outputInfos, 0, loutputInfos, 0, outputs.length);
System.arraycopy(outputMatrices, 0, loutputMatrices, 0, outputs.length);
for( int i=0; i<that.inputs.length; i++ ){
byte ixSrc = (byte) i;
byte ixTgt = transMap2.get((byte)i);
linputs[ixTgt] = that.inputs[ixSrc];
linputInfos[ixTgt] = that.inputInfos[ixSrc];
linputMatrices[ixTgt] = that.inputMatrices[ixSrc];
lpformats[ixTgt] = that.pformats[ixSrc];
lrlens[ixTgt] = that.rlens[ixSrc];
lclens[ixTgt] = that.clens[ixSrc];
lbrlens[ixTgt] = that.brlens[ixSrc];
lbclens[ixTgt] = that.bclens[ixSrc];
}
for( int i=0; i<_resultIndices.length; i++ )
lresultIndexes[i] = transMap1.get(_resultIndices[i]);
for( int i=0; i<that._resultIndices.length; i++ ){
loutputs[_resultIndices.length+i] = that.outputs[i];
loutputInfos[_resultIndices.length+i] = that.outputInfos[i];
loutputMatrices[_resultIndices.length+i] = that.outputMatrices[i];
lresultIndexes[_resultIndices.length+i] = transMap2.get(that._resultIndices[i]);
}
inputs = linputs; inputInfos = linputInfos; inputMatrices = linputMatrices;
pformats = lpformats;
outputs = loutputs; outputInfos = loutputInfos; outputMatrices = loutputMatrices;
rlens = lrlens; clens = lclens; brlens = lbrlens; bclens = lbclens;
_resultIndices = lresultIndexes;
//replace merged instructions with all transition map entries
String randInst1 = replaceInstructionStringWithTransMap(this.getIv_randInstructions(), transMap1);
String randInst2 = replaceInstructionStringWithTransMap(that.getIv_randInstructions(), transMap2);
String rrInst1 = replaceInstructionStringWithTransMap(this.getIv_recordReaderInstructions(), transMap1);
String rrInst2 = replaceInstructionStringWithTransMap(that.getIv_recordReaderInstructions(), transMap2);
String mapInst1 = replaceInstructionStringWithTransMap(this.getIv_instructionsInMapper(), transMap1);
String mapInst2 = replaceInstructionStringWithTransMap(that.getIv_instructionsInMapper(), transMap2);
String shuffleInst1 = replaceInstructionStringWithTransMap(this.getIv_shuffleInstructions(), transMap1);
String shuffleInst2 = replaceInstructionStringWithTransMap(that.getIv_shuffleInstructions(), transMap2);
String aggInst1 = replaceInstructionStringWithTransMap(this.getIv_aggInstructions(), transMap1);
String aggInst2 = replaceInstructionStringWithTransMap(that.getIv_aggInstructions(), transMap2);
String otherInst1 = replaceInstructionStringWithTransMap(this.getIv_otherInstructions(), transMap1);
String otherInst2 = replaceInstructionStringWithTransMap(that.getIv_otherInstructions(), transMap2);
//concatenate instructions
setIv_randInstructions( concatenateInstructions(randInst1, randInst2) );
setIv_recordReaderInstructions( concatenateInstructions(rrInst1, rrInst2) );
setIv_instructionsInMapper( concatenateInstructions(mapInst1, mapInst2) );
setIv_shuffleInstructions( concatenateInstructions(shuffleInst1, shuffleInst2) );
setIv_aggInstructions( concatenateInstructions(aggInst1, aggInst2) );
setIv_otherInstructions( concatenateInstructions(otherInst1, otherInst2) );
//merge memory requirements
_mapperMem = _mapperMem + that._mapperMem;
LOG.debug("Merged instruction:\n"+this.toString());
}
/**
* Safe replacement of mr indexes based on transition map. Multiple string replacements
* would fail for crossing transitions: e.g., 1->2, 2->1.
*
* @param inst instruction string
* @param transMap transition map
* @return result string
*/
private String replaceInstructionStringWithTransMap( String inst, HashMap<Byte,Byte> transMap )
{
//prevent unnecessary parsing and reconstruction
if( inst == null || inst.isEmpty() || transMap.isEmpty() )
return inst;
String[] pinst = inst.split(Lop.INSTRUCTION_DELIMITOR);
StringBuilder instOut = new StringBuilder();
for( String lpinst : pinst ){ //for each instruction
//split instruction into parts
String[] parts = InstructionUtils.getInstructionPartsWithValueType(lpinst);
//replace instruction parts
for( int i=0; i<parts.length; i++ )
{
String lpart = parts[i];
int pos = lpart.indexOf(Instruction.DATATYPE_PREFIX+DataType.MATRIX.toString());
if( pos>0 ){
String index = lpart.substring(0, pos);
String newindex = String.valueOf(transMap.get(Byte.parseByte(index)));
parts[i] = newindex + lpart.substring(pos);
}
}
if( instOut.length()>0 )
instOut.append(Lop.INSTRUCTION_DELIMITOR);
//reconstruct instruction
instOut.append("MR");
for( String lpart : parts ){
instOut.append(Lop.OPERAND_DELIMITOR);
instOut.append(lpart);
}
}
return instOut.toString();
}
private String concatenateInstructions(String inst1, String inst2)
{
boolean emptyInst1 = (inst1 == null || inst1.length()==0);
boolean emptyInst2 = (inst2 == null || inst2.length()==0);
String ret = "";
if( !emptyInst1 && !emptyInst2 )
ret = inst1 + Lop.INSTRUCTION_DELIMITOR + inst2;
else if( !emptyInst1 )
ret = inst1;
else if( !emptyInst2 )
ret = inst2;
return ret;
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE)
throw new DMLRuntimeException("MapReduce jobs cannot be executed when execution mode = singlenode");
//execute MR job
JobReturn jb = RunMRJobs.prepareAndSubmitJob(this, ec);
//specific post processing
if ( getJobType() == JobType.SORT && jb.getMetaData().length > 0 )
{
/* Populate returned stats into symbol table of matrices */
for ( int index=0; index < jb.getMetaData().length; index++) {
String varname = getOutputVars()[index];
ec.setMetaData(varname, jb.getMetaData()[index]);
}
}
else if ( jb.getMetaData().length > 0 )
{
/* Populate returned stats into symbol table of matrices */
for ( int index=0; index < jb.getMetaData().length; index++) {
String varname = getOutputVars()[index];
MatrixCharacteristics mc = ((MatrixDimensionsMetaData)jb.getMetaData(index)).getMatrixCharacteristics();
ec.getVariable(varname).updateMatrixCharacteristics(mc);
}
}
Statistics.incrementNoOfExecutedMRJobs();
}
}