/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.mr.CombineBinaryInstruction;
import org.apache.sysml.runtime.instructions.mr.CombineTernaryInstruction;
import org.apache.sysml.runtime.instructions.mr.MRInstruction;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock;
import org.apache.sysml.runtime.matrix.data.TaggedMatrixCell;
import org.apache.sysml.runtime.matrix.data.TaggedMatrixValue;
import org.apache.sysml.runtime.matrix.data.WeightedPair;
import org.apache.sysml.runtime.matrix.mapred.GMRMapper;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups;
import org.apache.sysml.runtime.matrix.mapred.ReduceBase;
import org.apache.sysml.runtime.util.UtilFunctions;
public class CombineMR
{
private static final Log LOG = LogFactory.getLog(CombineMR.class.getName());
private CombineMR() {
//prevent instantiation via private constructor
}
public static class InnerReducer extends ReduceBase
implements Reducer<MatrixIndexes, TaggedMatrixValue, MatrixIndexes, WeightedPair>
{
protected MRInstruction[] comb_instructions=null;
private MatrixIndexes keyBuff=new MatrixIndexes();
private WeightedPair valueBuff=new WeightedPair();
private HashMap<Byte, Pair<Integer, Integer>> outputBlockSizes=new HashMap<Byte, Pair<Integer, Integer>>();
private HashMap<Byte, ArrayList<Integer>> outputIndexesMapping=new HashMap<Byte, ArrayList<Integer>>();
@Override
public void reduce(MatrixIndexes indexes,
Iterator<TaggedMatrixValue> values,
OutputCollector<MatrixIndexes, WeightedPair> out, Reporter reporter)
throws IOException {
long start=System.currentTimeMillis();
if(firsttime)
{
cachedReporter=reporter;
firsttime=false;
}
cachedValues.reset();
while(values.hasNext())
{
TaggedMatrixValue taggedValue=values.next();
cachedValues.set(taggedValue.getTag(), indexes, taggedValue.getBaseObject(), true);
}
//LOG.info("before aggregation: \n"+cachedValues);
//perform aggregate operations first
//processAggregateInstructions(indexes, values);
//LOG.info("after aggregation: \n"+cachedValues);
//perform mixed operations
//processReducerInstructions();
processCombineInstructionsAndOutput(reporter);
reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis()-start);
}
public void configure(JobConf job)
{
super.configure(job);
try {
comb_instructions = MRJobConfiguration.getCombineInstruction(job);
} catch (Exception e) {
throw new RuntimeException(e);
}
for(int i=0; i<resultIndexes.length; i++)
{
MatrixCharacteristics stat=MRJobConfiguration.getMatrixCharacteristicsForOutput(job, resultIndexes[i]);
outputBlockSizes.put(resultIndexes[i], new Pair<Integer, Integer>(stat.getRowsPerBlock(), stat.getColsPerBlock()));
}
for(MRInstruction ins: comb_instructions)
{
outputIndexesMapping.put(ins.output, getOutputIndexes(ins.output));
}
}
void processCombineInstructionsAndOutput(Reporter reporter)
throws IOException
{
for(MRInstruction ins: comb_instructions)
{
if(ins instanceof CombineBinaryInstruction)
processBinaryCombineInstruction((CombineBinaryInstruction)ins, reporter);
else if(ins instanceof CombineTernaryInstruction)
processTernaryCombineInstruction((CombineTernaryInstruction)ins, reporter);
else
throw new IOException("unsupported instruction: "+ins);
}
}
private void processTernaryCombineInstruction(
CombineTernaryInstruction ins, Reporter reporter) throws IOException{
IndexedMatrixValue in1=cachedValues.getFirst(ins.input1);
IndexedMatrixValue in2=cachedValues.getFirst(ins.input2);
IndexedMatrixValue in3=cachedValues.getFirst(ins.input3);
if(in1==null && in2==null && in3==null)
return;
int nr=0, nc=0;
if(in1!=null)
{
nr=in1.getValue().getNumRows();
nc=in1.getValue().getNumColumns();
}else if(in2!=null)
{
nr=in2.getValue().getNumRows();
nc=in2.getValue().getNumColumns();
}else
{
nr=in3.getValue().getNumRows();
nc=in3.getValue().getNumColumns();
}
//if one of the inputs is null, then it is a all zero block
if(in1==null)
{
in1=zeroInput;
in1.getValue().reset(nr, nc);
}
if(in2==null)
{
in2=zeroInput;
in2.getValue().reset(nr, nc);
}
if(in3==null)
{
in3=zeroInput;
in3.getValue().reset(nr, nc);
}
//process instruction
try {
ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output);
for(int r=0; r<nr; r++)
for(int c=0; c<nc; c++)
{
Pair<Integer, Integer> blockSize=outputBlockSizes.get(ins.output);
keyBuff.setIndexes(
UtilFunctions.computeCellIndex(in1.getIndexes().getRowIndex(), blockSize.getKey(), r),
UtilFunctions.computeCellIndex(in1.getIndexes().getColumnIndex(), blockSize.getValue(), c)
);
valueBuff.setValue(in1.getValue().getValue(r, c));
valueBuff.setOtherValue(in2.getValue().getValue(r, c));
valueBuff.setWeight(in3.getValue().getValue(r, c));
for(int i: outputIndexes)
{
collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter);
//System.out.println("output: "+keyBuff+" -- "+valueBuff);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void processBinaryCombineInstruction(CombineBinaryInstruction ins, Reporter reporter)
throws IOException
{
IndexedMatrixValue in1=cachedValues.getFirst(ins.input1);
IndexedMatrixValue in2=cachedValues.getFirst(ins.input2);
if(in1==null && in2==null)
return;
MatrixIndexes indexes;
if(in1!=null)
indexes=in1.getIndexes();
else
indexes=in2.getIndexes();
//if one of the inputs is null, then it is a all zero block
if(in1==null)
{
in1=zeroInput;
in1.getValue().reset(in2.getValue().getNumRows(),
in2.getValue().getNumColumns());
}
if(in2==null)
{
in2=zeroInput;
in2.getValue().reset(in1.getValue().getNumRows(),
in1.getValue().getNumColumns());
}
//System.out.println("in1:"+in1);
//System.out.println("in2:"+in2);
//process instruction
try {
/*in1.getValue().combineOperations(in2.getValue(), collectFinalMultipleOutputs,
reporter, keyBuff, valueBuff, getOutputIndexes(ins.output));*/
ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output);
for(int r=0; r<in1.getValue().getNumRows(); r++)
for(int c=0; c<in1.getValue().getNumColumns(); c++)
{
Pair<Integer, Integer> blockSize=outputBlockSizes.get(ins.output);
keyBuff.setIndexes(
UtilFunctions.computeCellIndex(indexes.getRowIndex(), blockSize.getKey(), r),
UtilFunctions.computeCellIndex(indexes.getColumnIndex(), blockSize.getValue(), c)
);
valueBuff.setValue(in1.getValue().getValue(r, c));
double temp=in2.getValue().getValue(r, c);
if(ins.isSecondInputWeight())
{
valueBuff.setWeight(temp);
valueBuff.setOtherValue(0);
}
else
{
valueBuff.setWeight(1);
valueBuff.setOtherValue(temp);
}
for(int i: outputIndexes)
{
collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter);
//System.out.println("output: "+keyBuff+" -- "+valueBuff);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos,
long[] rlens, long[] clens, int[] brlens, int[] bclens, String combineInstructions,
int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos)
throws Exception
{
JobConf job;
job = new JobConf(CombineMR.class);
job.setJobName("Standalone-MR");
boolean inBlockRepresentation=MRJobConfiguration.deriveRepresentation(inputInfos);
//whether use block representation or cell representation
MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);
byte[] inputIndexes=new byte[inputs.length];
for(byte b=0; b<inputs.length; b++)
inputIndexes[b]=b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, inputIndexes, inputs, inputInfos, brlens, bclens,
true, inBlockRepresentation? ConvertTarget.BLOCK: ConvertTarget.CELL);
//set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, inputIndexes, rlens, clens);
//set up the block size
MRJobConfiguration.setBlocksSizes(job, inputIndexes, brlens, bclens);
//set up unary instructions that will perform in the mapper
MRJobConfiguration.setInstructionsInMapper(job, "");
//set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setAggregateInstructions(job, "");
//set up the instructions that will happen in the reducer, after the aggregation instrucions
MRJobConfiguration.setInstructionsInReducer(job, "");
MRJobConfiguration.setCombineInstructions(job, combineInstructions);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up custom map/reduce configurations
DMLConfig config = ConfigurationManager.getDMLConfig();
MRJobConfiguration.setupCustomMRConfigurations(job, config);
//set up what matrices are needed to pass from the mapper to reducer
HashSet<Byte> mapoutputIndexes=MRJobConfiguration.setUpOutputIndexesForMapper(job, inputIndexes, null, null, combineInstructions,
resultIndexes);
//set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, null, outputs, outputInfos, inBlockRepresentation);
// configure mapper and the mapper output key value pairs
job.setMapperClass(GMRMapper.class);
job.setMapOutputKeyClass(MatrixIndexes.class);
if(inBlockRepresentation)
job.setMapOutputValueClass(TaggedMatrixBlock.class);
else
job.setMapOutputValueClass(TaggedMatrixCell.class);
//configure reducer
job.setReducerClass(InnerReducer.class);
//job.setReducerClass(PassThroughReducer.class);
MatrixChar_N_ReducerGroups ret=MRJobConfiguration.computeMatrixCharacteristics(job, inputIndexes,
null, null, null, combineInstructions, resultIndexes, mapoutputIndexes, false);
MatrixCharacteristics[] stats=ret.stats;
//set up the number of reducers
MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
// Print the complete instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
// By default, the job executes in "cluster" mode.
// Determine if we can optimize and run it in "local" mode.
MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
for ( int i=0; i < inputs.length; i++ ) {
inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
}
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
RunningJob runjob=JobClient.runJob(job);
return new JobReturn(stats, runjob.isSuccessful());
}
}