/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.matrix.mapred; import java.io.IOException; import java.util.ArrayList; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes; public class MMCJMRMapper extends MapperBase implements Mapper<Writable, Writable, Writable, Writable> { //the aggregate binary instruction for this mmcj job private AggregateBinaryInstruction aggBinInstruction; //tempory variable private TaggedFirstSecondIndexes taggedIndexes=new TaggedFirstSecondIndexes(); //the tags to be output for the left and right matrice for the mmcj private byte tagForLeft=0; private byte tagForRight=1; @Override public void map(Writable rawKey, Writable rawValue, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { commonMap(rawKey, rawValue, out, reporter); } public void configure(JobConf job) { super.configure(job); AggregateBinaryInstruction[] ins; try { ins = MRJobConfiguration.getAggregateBinaryInstructions(job); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } if(ins.length!=1) throw new RuntimeException("MMCJ only perform one aggregate binary instruction"); aggBinInstruction=ins[0]; //decide which matrix need to be cached for cross product MatrixCharacteristics dim1=MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, aggBinInstruction.input1); MatrixCharacteristics dim2=MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, aggBinInstruction.input2); if(dim1.getRows()>dim2.getCols()) { tagForLeft=1; tagForRight=0; } } @Override protected void specialOperationsForActualMap(int index, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { //apply all instructions processMapperInstructionsForMatrix(index); //process the mapper part of MMCJ processMMCJInMapperAndOutput(aggBinInstruction, tagForLeft, tagForRight, taggedIndexes, out); } protected void processMMCJInMapperAndOutput(AggregateBinaryInstruction aggBinInstruction, byte tagForLeft, byte tagForRight, TaggedFirstSecondIndexes taggedIndexes, OutputCollector<Writable, Writable> out) throws IOException { //output the key value pair for the left matrix ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(aggBinInstruction.input1); if( blkList1 != null ) for(IndexedMatrixValue result:blkList1) if(result!=null) { taggedIndexes.setTag(tagForLeft); taggedIndexes.setIndexes(result.getIndexes().getColumnIndex(), result.getIndexes().getRowIndex()); if( !((MatrixBlock)result.getValue()).isEmptyBlock() ) out.collect(taggedIndexes, result.getValue()); //System.out.println("In Mapper: output "+taggedIndexes+" "+ result.getValue().getNumRows()+"x"+result.getValue().getNumColumns()); } //output the key value pair for the right matrix //Note: due to cached list reuse after first flush ArrayList<IndexedMatrixValue> blkList2 = cachedValues.get(aggBinInstruction.input2); if( blkList2 != null ) for(IndexedMatrixValue result:blkList2) if(result!=null) { taggedIndexes.setTag(tagForRight); taggedIndexes.setIndexes(result.getIndexes().getRowIndex(), result.getIndexes().getColumnIndex()); if( !((MatrixBlock)result.getValue()).isEmptyBlock() ) out.collect(taggedIndexes, result.getValue()); //System.out.println("In Mapper: output "+taggedIndexes+" "+ result.getValue().getNumRows()+"x"+result.getValue().getNumColumns()); } } }