/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.sysml.runtime.instructions.mr.ReblockInstruction;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.AdaptivePartialBlock;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.PartialBlock;
import org.apache.sysml.runtime.matrix.data.TaggedAdaptivePartialBlock;
import org.apache.sysml.runtime.util.MapReduceTool;
public class ReblockMapper extends MapperBase
implements Mapper<Writable, Writable, Writable, Writable>
{
//state of reblock mapper
private OutputCollector<Writable, Writable> cachedCollector = null;
private JobConf cachedJobConf = null;
private HashMap<Byte, MatrixCharacteristics> dimensionsOut = new HashMap<Byte, MatrixCharacteristics>();
private HashMap<Byte, MatrixCharacteristics> dimensionsIn = new HashMap<Byte, MatrixCharacteristics>();
private HashMap<Byte, Boolean> emptyBlocks = new HashMap<Byte, Boolean>();
//reblock buffer
private HashMap<Byte, ReblockBuffer> buffer = new HashMap<Byte,ReblockBuffer>();
private int buffersize =-1;
@Override
public void map(Writable rawKey, Writable rawValue, OutputCollector<Writable, Writable> out, Reporter reporter)
throws IOException
{
cachedCollector = out;
commonMap(rawKey, rawValue, out, reporter);
}
@Override
public void configure(JobConf job)
{
MRJobConfiguration.setMatrixValueClass(job, false); //worst-case
super.configure(job);
//cache job conf for use in close
cachedJobConf = job;
try
{
ReblockInstruction[] reblockInstructions = MRJobConfiguration.getReblockInstructions(job);
//get dimension information
for(ReblockInstruction ins: reblockInstructions)
{
dimensionsIn.put(ins.input, MRJobConfiguration.getMatrixCharacteristicsForInput(job, ins.input));
dimensionsOut.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
emptyBlocks.put(ins.output, ins.outputEmptyBlocks);
}
//compute reblock buffer size (according to relevant rblk inst of this task only)
//(buffer size divided by max reblocks per input matrix, because those are shared in JVM)
int maxlen = 1;
for( ArrayList<ReblockInstruction> rinst : reblock_instructions )
maxlen = Math.max(maxlen, rinst.size()); //max reblocks per input
buffersize = ReblockBuffer.DEFAULT_BUFFER_SIZE/maxlen;
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException
{
super.close();
//flush buffered data
for( Entry<Byte,ReblockBuffer> e : buffer.entrySet() )
{
ReblockBuffer rbuff = e.getValue();
rbuff.flushBuffer(e.getKey(), cachedCollector);
}
//handle empty block output (responsibility distributed over all map tasks)
if( cachedJobConf==null || cachedCollector==null )
return;
long mapID = Long.parseLong(MapReduceTool.getUniqueKeyPerTask(cachedJobConf, true));
long numMap = cachedJobConf.getNumMapTasks();
MatrixIndexes tmpIx = new MatrixIndexes();
TaggedAdaptivePartialBlock tmpVal = new TaggedAdaptivePartialBlock();
AdaptivePartialBlock apb = new AdaptivePartialBlock(new PartialBlock(-1,-1,0));
tmpVal.setBaseObject(apb);
for(Entry<Byte, MatrixCharacteristics> e: dimensionsOut.entrySet())
{
tmpVal.setTag(e.getKey());
MatrixCharacteristics mc = e.getValue();
long rlen = mc.getRows();
long clen = mc.getCols();
long brlen = mc.getRowsPerBlock();
long bclen = mc.getColsPerBlock();
long nnz = mc.getNonZeros();
//output empty blocks on demand (not required if nnz ensures that values exist in each block)
if( nnz >= (rlen*clen-Math.min(brlen, rlen)*Math.min(bclen, clen)+1)
|| !emptyBlocks.get(e.getKey()) )
{
continue; //safe to skip empty block output
}
//output part of empty blocks (all mappers contribute for better load balance),
//where mapper responsibility is distributed over row blocks
long numBlocks = (long)Math.ceil((double)rlen/brlen);
long len = (long)Math.ceil((double)numBlocks/numMap);
long start = mapID * len * brlen;
long end = Math.min((mapID+1) * len * brlen, rlen);
for(long i=start, r=start/brlen+1; i<end; i+=brlen, r++)
for(long j=0, c=1; j<clen; j+=bclen, c++)
{
tmpIx.setIndexes(r, c);
cachedCollector.collect(tmpIx, tmpVal);
}
}
}
@Override
protected void specialOperationsForActualMap(int index,
OutputCollector<Writable, Writable> out, Reporter reporter)
throws IOException
{
//note: invoked from MapperBase for each cell
//apply all instructions
processMapperInstructionsForMatrix(index);
//apply reblock instructions and output
processReblockInMapperAndOutput(index, out);
}
protected void processReblockInMapperAndOutput(int index, OutputCollector<Writable, Writable> out)
throws IOException
{
for(ReblockInstruction ins : reblock_instructions.get(index))
{
ArrayList<IndexedMatrixValue> ixvList = cachedValues.get(ins.input);
if( ixvList!=null ) {
for(IndexedMatrixValue inValue : ixvList )
{
if(inValue==null)
continue;
//get buffer
ReblockBuffer rbuff = buffer.get(ins.output);
if( rbuff==null )
{
MatrixCharacteristics mc = dimensionsOut.get(ins.output);
rbuff = new ReblockBuffer( buffersize, mc.getRows(), mc.getCols(), ins.brlen, ins.bclen );
buffer.put(ins.output, rbuff);
}
//append cells and flush buffer if required
MatrixValue mval = inValue.getValue();
if( mval instanceof MatrixBlock )
{
MatrixIndexes inIx = inValue.getIndexes();
MatrixCharacteristics mc = dimensionsIn.get(ins.input);
long row_offset = (inIx.getRowIndex()-1)*mc.getRowsPerBlock() + 1;
long col_offset = (inIx.getColumnIndex()-1)*mc.getColsPerBlock() + 1;
//append entire block incl. flush on demand
rbuff.appendBlock(row_offset, col_offset, (MatrixBlock)mval, ins.output, out );
}
else //if( mval instanceof MatrixCell )
{
rbuff.appendCell( inValue.getIndexes().getRowIndex(),
inValue.getIndexes().getColumnIndex(),
((MatrixCell)mval).getValue() );
//flush buffer if necessary
if( rbuff.getSize() >= rbuff.getCapacity() )
rbuff.flushBuffer( ins.output, out );
}
}
}
}
}
}