/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *  */ package com.ibm.bi.dml.runtime.matrix.mapred; import java.io.IOException; import java.util.ArrayList; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import com.ibm.bi.dml.runtime.instructions.mr.GroupedAggregateInstruction; import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock; import com.ibm.bi.dml.runtime.matrix.data.MatrixIndexes; import com.ibm.bi.dml.runtime.matrix.data.MatrixValue; import com.ibm.bi.dml.runtime.matrix.data.TaggedInt; import com.ibm.bi.dml.runtime.matrix.data.WeightedCell; public class GroupedAggMRMapper extends MapperBase implements Mapper<MatrixIndexes, MatrixValue, TaggedInt, WeightedCell> { //block instructions that need to be performed in part by mapper protected ArrayList<ArrayList<GroupedAggregateInstruction>> groupAgg_instructions=new ArrayList<ArrayList<GroupedAggregateInstruction>>(); private IntWritable outKeyValue=new IntWritable(); private TaggedInt outKey=new TaggedInt(outKeyValue, (byte)0); private WeightedCell outValue=new WeightedCell(); @Override public void map(MatrixIndexes key, MatrixValue value, OutputCollector<TaggedInt, WeightedCell> out, Reporter reporter) throws IOException { for(int i=0; i<representativeMatrixes.size(); i++) for(GroupedAggregateInstruction ins : groupAgg_instructions.get(i)) { //set the tag once for the block outKey.setTag(ins.output); //get block and unroll into weighted cells //(it will be in dense format) MatrixBlock block = (MatrixBlock) value; int rlen = block.getNumRows(); int clen = block.getNumColumns(); if( clen == 2 ) //w/o weights { for( int r=0; r<rlen; r++ ) { outKeyValue.set((int)block.quickGetValue(r, 1)); outValue.setValue(block.quickGetValue(r, 0)); outValue.setWeight(1); out.collect(outKey, outValue); } } else //w/ weights { for( int r=0; r<rlen; r++ ) { outKeyValue.set((int)block.quickGetValue(r, 1)); outValue.setValue(block.quickGetValue(r, 0)); outValue.setWeight(block.quickGetValue(r, 2)); out.collect(outKey, outValue); } } } } @Override protected void specialOperationsForActualMap(int index, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { } @Override public void configure(JobConf job) { super.configure(job); try { GroupedAggregateInstruction[] grpaggIns = MRJobConfiguration.getGroupedAggregateInstructions(job); if( grpaggIns == null ) throw new RuntimeException("no GroupAggregate Instructions found!"); ArrayList<GroupedAggregateInstruction> vec = new ArrayList<GroupedAggregateInstruction>(); for(int i=0; i<representativeMatrixes.size(); i++) { byte input=representativeMatrixes.get(i); for(GroupedAggregateInstruction ins : grpaggIns) if(ins.input == input) vec.add(ins); groupAgg_instructions.add(vec); } } catch (Exception e) { throw new RuntimeException(e); } } }