/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.matrix.mapred; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction; import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction; import org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction; import org.apache.sysml.runtime.instructions.mr.MRInstruction; import org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction; import org.apache.sysml.runtime.instructions.mr.ReblockInstruction; import org.apache.sysml.runtime.matrix.data.Converter; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; import org.apache.sysml.runtime.matrix.data.MatrixValue; import org.apache.sysml.runtime.matrix.data.Pair; @SuppressWarnings("rawtypes") public abstract class MapperBase extends MRBaseForCommonInstructions { //the indexes that this particular input matrix file represents protected ArrayList<Byte> representativeMatrixes=null; //the dimension for all the representative matrices //(they are all the same, since coming from the same files) protected long[] rlens=null; protected long[] clens=null; //the block sizes for the representative matrices protected int[] brlens=null; protected int[] bclens=null; //upper boundaries to check protected long[] rbounds=null; protected long[] cbounds=null; //boundary block sizes protected int[] lastblockrlens=null; protected int[] lastblockclens=null; //rand instructions that need to be performed in mapper protected ArrayList<DataGenMRInstruction> dataGen_instructions=new ArrayList<DataGenMRInstruction>(); //instructions that need to be performed in mapper protected ArrayList<ArrayList<MRInstruction>> mapper_instructions=new ArrayList<ArrayList<MRInstruction>>(); //block instructions that need to be performed in part by mapper protected ArrayList<ArrayList<ReblockInstruction>> reblock_instructions=new ArrayList<ArrayList<ReblockInstruction>>(); //csv block instructions that need to be performed in part by mapper protected ArrayList<ArrayList<CSVReblockInstruction>> csv_reblock_instructions=new ArrayList<ArrayList<CSVReblockInstruction>>(); //the indexes of the matrices that needed to be outputted protected ArrayList<ArrayList<Byte>> outputIndexes=new ArrayList<ArrayList<Byte>>(); //converter to convert the input record into indexes and matrix value (can be a cell or a block) protected Converter inputConverter=null; //a counter to measure the time spent in a mapper protected static enum Counters { MAP_TIME }; @SuppressWarnings("unchecked") protected void commonMap(Writable rawKey, Writable rawValue, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { long start=System.currentTimeMillis(); //System.out.println("read in Mapper: "+rawKey+": "+rawValue); //for each representative matrix, read the record and apply instructions for(int i=0; i<representativeMatrixes.size(); i++) { byte thisMatrix=representativeMatrixes.get(i); //convert the record into the right format for the representative matrix inputConverter.setBlockSize(brlens[i], bclens[i]); inputConverter.convert(rawKey, rawValue); //apply unary instructions on the converted indexes and values while(inputConverter.hasNext()) { Pair<MatrixIndexes, MatrixValue> pair = inputConverter.next(); MatrixIndexes indexes=pair.getKey(); MatrixValue value=pair.getValue(); checkValidity(indexes, value, i); //put the input in the cache cachedValues.reset(); cachedValues.set(thisMatrix, indexes, value); //special operations for individual mapp type specialOperationsForActualMap(i, out, reporter); } } reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis()-start); } protected abstract void specialOperationsForActualMap(int index, OutputCollector<Writable, Writable> out, Reporter reporter)throws IOException; protected void checkValidity(MatrixIndexes indexes, MatrixValue value, int rep) throws IOException { if(indexes.getRowIndex()<=0 || indexes.getColumnIndex()<=0 || indexes.getRowIndex()>rbounds[rep] || indexes.getColumnIndex()>cbounds[rep]){ throw new IOException("key: "+indexes+" is out of range: [1, "+rbounds[rep]+"] and [1, "+cbounds[rep]+"] (tag="+rep+")!"); } if(indexes.getRowIndex()==rbounds[rep] && value.getNumRows()>lastblockrlens[rep]) { throw new IOException("boundary block with "+value.getNumRows()+" rows exceeds the size "+lastblockrlens[rep]+" " + "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")"); } if(indexes.getColumnIndex()==cbounds[rep] && value.getNumColumns()>lastblockclens[rep]) { throw new IOException("boundary block with "+value.getNumColumns()+" columns exceeds the size "+lastblockclens[rep]+" " + "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")"); } } /** * Determines if empty blocks can be discarded on map input. Conceptually, this is true * if the individual instruction don't need to output empty blocks and if they are sparsesafe. * * @return true if empty blocks can be discarded on map input */ public boolean allowsFilterEmptyInputBlocks() { boolean ret = true; int count = 0; if( ret && mapper_instructions!=null ) for( ArrayList<MRInstruction> vinst : mapper_instructions ) for( MRInstruction inst : vinst ){ ret &= (inst instanceof AggregateBinaryInstruction && !((AggregateBinaryInstruction)inst).getOutputEmptyBlocks() ) ||(inst instanceof PMMJMRInstruction && !((PMMJMRInstruction)inst).getOutputEmptyBlocks() ); count++; //ensure that mapper instructions exists } return ret && count>0; } public void configure(JobConf job) { super.configure(job); //get the indexes that this matrix file represents, //since one matrix file can occur multiple times in a statement try { representativeMatrixes=MRJobConfiguration.getInputMatrixIndexesInMapper(job); } catch (IOException e) { throw new RuntimeException(e); } //get input converter information inputConverter=MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0)); DataGenMRInstruction[] allDataGenIns; MRInstruction[] allMapperIns; ReblockInstruction[] allReblockIns; CSVReblockInstruction[] allCSVReblockIns; try { allDataGenIns = MRJobConfiguration.getDataGenInstructions(job); //parse the instructions on the matrices that this file represent allMapperIns=MRJobConfiguration.getInstructionsInMapper(job); //parse the reblock instructions on the matrices that this file represent allReblockIns=MRJobConfiguration.getReblockInstructions(job); allCSVReblockIns=MRJobConfiguration.getCSVReblockInstructions(job); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } //get all the output indexes byte[] outputs=MRJobConfiguration.getOutputIndexesInMapper(job); //get the dimension of all the representative matrices rlens=new long[representativeMatrixes.size()]; clens=new long[representativeMatrixes.size()]; for(int i=0; i<representativeMatrixes.size(); i++) { rlens[i]=MRJobConfiguration.getNumRows(job, representativeMatrixes.get(i)); clens[i]=MRJobConfiguration.getNumColumns(job, representativeMatrixes.get(i)); // System.out.println("get dimension for "+representativeMatrixes.get(i)+": "+rlens[i]+", "+clens[i]); } //get the block sizes of the representative matrices brlens=new int[representativeMatrixes.size()]; bclens=new int[representativeMatrixes.size()]; for(int i=0; i<representativeMatrixes.size(); i++) { brlens[i]=MRJobConfiguration.getNumRowsPerBlock(job, representativeMatrixes.get(i)); bclens[i]=MRJobConfiguration.getNumColumnsPerBlock(job, representativeMatrixes.get(i)); // System.out.println("get blocksize for "+representativeMatrixes.get(i)+": "+brlens[i]+", "+bclens[i]); } rbounds=new long[representativeMatrixes.size()]; cbounds=new long[representativeMatrixes.size()]; lastblockrlens=new int[representativeMatrixes.size()]; lastblockclens=new int[representativeMatrixes.size()]; //calculate upper boundaries for key value pairs if(valueClass.equals(MatrixBlock.class)) { for(int i=0; i<representativeMatrixes.size(); i++) { rbounds[i]=(long)Math.ceil((double)rlens[i]/(double)brlens[i]); cbounds[i]=(long)Math.ceil((double)clens[i]/(double)bclens[i]); lastblockrlens[i]=(int) (rlens[i]%brlens[i]); lastblockclens[i]=(int) (clens[i]%bclens[i]); if(lastblockrlens[i]==0) lastblockrlens[i]=brlens[i]; if(lastblockclens[i]==0) lastblockclens[i]=bclens[i]; /* * what is this for???? // DRB: the row indexes need to be fixed rbounds[i] = rlens[i];*/ } }else { for(int i=0; i<representativeMatrixes.size(); i++) { rbounds[i]=rlens[i]; cbounds[i]=clens[i]; lastblockrlens[i]=1; lastblockclens[i]=1; // System.out.println("get bound for "+representativeMatrixes.get(i)+": "+rbounds[i]+", "+cbounds[i]); } } //load data from distributed cache (if required, reuse if jvm_reuse) try { setupDistCacheFiles(job); } catch(IOException ex) { throw new RuntimeException(ex); } //collect unary instructions for each representative matrix HashSet<Byte> set=new HashSet<Byte>(); for(int i=0; i<representativeMatrixes.size(); i++) { set.clear(); set.add(representativeMatrixes.get(i)); //collect the relavent datagen instructions for this representative matrix ArrayList<DataGenMRInstruction> dataGensForThisMatrix=new ArrayList<DataGenMRInstruction>(); if(allDataGenIns!=null) { for(DataGenMRInstruction ins:allDataGenIns) { if(set.contains(ins.getInput())) { dataGensForThisMatrix.add(ins); set.add(ins.output); } } } if(dataGensForThisMatrix.size()>1) throw new RuntimeException("only expects at most one rand instruction per input"); if(dataGensForThisMatrix.isEmpty()) dataGen_instructions.add(null); else dataGen_instructions.add(dataGensForThisMatrix.get(0)); //collect the relavent instructions for this representative matrix ArrayList<MRInstruction> opsForThisMatrix=new ArrayList<MRInstruction>(); if(allMapperIns!=null) { for(MRInstruction ins: allMapperIns) { try { /* boolean toAdd=true; for(byte input: ins.getInputIndexes()) if(!set.contains(input)) { toAdd=false; break; } */ boolean toAdd=false; for(byte input : ins.getInputIndexes()) if(set.contains(input)) { toAdd=true; break; } if(toAdd) { opsForThisMatrix.add(ins); set.add(ins.output); } } catch (DMLRuntimeException e) { throw new RuntimeException(e); } } } mapper_instructions.add(opsForThisMatrix); //collect the relavent reblock instructions for this representative matrix ArrayList<ReblockInstruction> reblocksForThisMatrix=new ArrayList<ReblockInstruction>(); if(allReblockIns!=null) { for(ReblockInstruction ins:allReblockIns) { if(set.contains(ins.input)) { reblocksForThisMatrix.add(ins); set.add(ins.output); } } } reblock_instructions.add(reblocksForThisMatrix); //collect the relavent reblock instructions for this representative matrix ArrayList<CSVReblockInstruction> csvReblocksForThisMatrix=new ArrayList<CSVReblockInstruction>(); if(allCSVReblockIns!=null) { for(CSVReblockInstruction ins:allCSVReblockIns) { if(set.contains(ins.input)) { csvReblocksForThisMatrix.add(ins); set.add(ins.output); } } } csv_reblock_instructions.add(csvReblocksForThisMatrix); //collect the output indexes for this representative matrix ArrayList<Byte> outsForThisMatrix=new ArrayList<Byte>(); for(byte output: outputs) { if(set.contains(output)) outsForThisMatrix.add(output); } outputIndexes.add(outsForThisMatrix); } } protected void processMapperInstructionsForMatrix(int index) throws IOException { //apply all mapper instructions try { processMixedInstructions(mapper_instructions.get(index)); } catch (Exception e) { throw new IOException(e); } } }