/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
import org.apache.sysml.runtime.instructions.mr.ReblockInstruction;
import org.apache.sysml.runtime.matrix.CSVReblockMR.BlockRow;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
public class CSVReblockReducer extends ReduceBase implements Reducer<TaggedFirstSecondIndexes, BlockRow, MatrixIndexes, MatrixBlock>
{
@Override
public void reduce(TaggedFirstSecondIndexes key, Iterator<BlockRow> values,
OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter)
throws IOException
{
long start=System.currentTimeMillis();
commonSetup(reporter);
cachedValues.reset();
//process the reducer part of the reblock operation
processCSVReblock(key, values, dimensions);
//perform mixed operations
processReducerInstructions();
//output results
outputResultsFromCachedValues(reporter);
reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis()-start);
}
protected void processCSVReblock(TaggedFirstSecondIndexes indexes, Iterator<BlockRow> values,
HashMap<Byte, MatrixCharacteristics> dimensions) throws IOException
{
try
{
Byte tag=indexes.getTag();
//there only one block in the cache for this output
IndexedMatrixValue block=cachedValues.getFirst(tag);
while(values.hasNext())
{
BlockRow row=values.next();
if(block==null)
{
block=cachedValues.holdPlace(tag, valueClass);
int brlen=dimensions.get(tag).getRowsPerBlock();
int bclen=dimensions.get(tag).getColsPerBlock();
int realBrlen=(int)Math.min((long)brlen, dimensions.get(tag).getRows()-(indexes.getFirstIndex()-1)*brlen);
int realBclen=(int)Math.min((long)bclen, dimensions.get(tag).getCols()-(indexes.getSecondIndex()-1)*bclen);
block.getValue().reset(realBrlen, realBclen, false);
block.getIndexes().setIndexes(indexes.getFirstIndex(), indexes.getSecondIndex());
}
MatrixBlock mb = (MatrixBlock) block.getValue();
mb.copy(row.indexInBlock, row.indexInBlock, 0, row.data.getNumColumns()-1, row.data, false);
}
((MatrixBlock) block.getValue()).recomputeNonZeros();
}
catch(DMLRuntimeException ex)
{
throw new IOException(ex);
}
}
@Override
public void configure(JobConf job)
{
MRJobConfiguration.setMatrixValueClass(job, true);
super.configure(job);
//parse the reblock instructions
CSVReblockInstruction[] reblockInstructions;
try {
reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
for(ReblockInstruction ins: reblockInstructions)
dimensions.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
}
}