/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.matrix; import java.util.HashSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapred.Counters.Group; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RunningJob; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.instructions.MRInstructionParser; import org.apache.sysml.runtime.instructions.MRJobInstruction; import org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction; import org.apache.sysml.runtime.matrix.data.InputInfo; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixCell; import org.apache.sysml.runtime.matrix.data.OutputInfo; import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes; import org.apache.sysml.runtime.matrix.mapred.MMCJMRMapper; import org.apache.sysml.runtime.matrix.mapred.MMCJMRReducerWithAggregator; import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups; import org.apache.sysml.yarn.DMLAppMasterUtils; import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer; /* * inBlockRepresentation: indicate whether to use block representation or cell representation * inputs: input matrices, the inputs are indexed by 0, 1, 2, .. based on the position in this string * inputInfos: the input format information for the input matrices * rlen: the number of rows for each matrix * clen: the number of columns for each matrix * brlen: the number of rows per block * bclen: the number of columns per block * instructionsInMapper: in Mapper, the set of unary operations that need to be performed on each input matrix * aggInstructionsInReducer: in Reducer, right after sorting, the set of aggreagte operations that need * to be performed on each input matrix, * aggBinInstrction: the aggregate binary instruction for the MMCJ operation * numReducers: the number of reducers * replication: the replication factor for the output * output: the path for the output file * outputInfo: information about output format */ public class MMCJMR { private static final boolean AUTOMATIC_CONFIG_NUM_REDUCERS = true; private static final Log LOG = LogFactory.getLog(MMCJMR.class); private MMCJMR() { //prevent instantiation via private constructor } public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, String output, OutputInfo outputinfo) throws Exception { JobConf job = new JobConf(MMCJMR.class); // TODO: check w/ yuanyuan. This job always runs in blocked mode, and hence derivation is not necessary. boolean inBlockRepresentation=MRJobConfiguration.deriveRepresentation(inputInfos); // by default, assume that dimensions of MMCJ's output are known at compile time byte resultDimsUnknown = (byte) 0; MatrixCharacteristics[] stats=commonSetup(job, inBlockRepresentation, inputs, inputInfos, rlens, clens, brlens, bclens, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, numReducers, replication, resultDimsUnknown, output, outputinfo); // Print the complete instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" // There is always a single output if ( stats[0].getRows() == -1 || stats[0].getCols() == -1 ) { resultDimsUnknown = (byte) 1; // if the dimensions are unknown, then setup done in commonSetup() must be updated byte[] resultIndexes=new byte[]{MRInstructionParser.parseSingleInstruction(aggBinInstrction).output}; byte[] resultDimsUnknown_Array = new byte[]{resultDimsUnknown}; //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[]{output}, new OutputInfo[]{outputinfo}, inBlockRepresentation); } AggregateBinaryInstruction ins=(AggregateBinaryInstruction) MRInstructionParser.parseSingleInstruction(aggBinInstrction); MatrixCharacteristics dim1 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input1); MatrixCharacteristics dim2 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input2); if(dim1.getRowsPerBlock()>dim1.getRows()) dim1.setRowsPerBlock( (int) dim1.getRows() ); if(dim1.getColsPerBlock()>dim1.getCols()) dim1.setColsPerBlock( (int) dim1.getCols() ); if(dim2.getRowsPerBlock()>dim2.getRows()) dim2.setRowsPerBlock( (int) dim2.getRows() ); if(dim2.getColsPerBlock()>dim2.getCols()) dim2.setColsPerBlock( (int) dim2.getCols() ); long blockSize1=77+8*dim1.getRowsPerBlock()*dim1.getColsPerBlock(); long blockSize2=77+8*dim2.getRowsPerBlock()*dim2.getColsPerBlock(); long blockSizeResult=77+8*dim1.getRowsPerBlock()*dim2.getColsPerBlock(); long cacheSize = -1; //cache the first result if(dim1.getRows()<dim2.getCols()) { long numBlocks=(long)Math.ceil((double)dim1.getRows()/(double)dim1.getRowsPerBlock()); cacheSize=numBlocks*(20+blockSize1)+32; } else //cache the second result { long numBlocks=(long)Math.ceil((double)dim2.getCols()/(double) dim2.getColsPerBlock()); cacheSize=numBlocks*(20+blockSize2)+32; } //add known memory consumption (will be substracted from output buffer) cacheSize += 2* Math.max(blockSize1, blockSize2) //the cached key-value pair (plus input instance) + blockSizeResult //the cached single result + MRJobConfiguration.getMiscMemRequired(job); //misc memory requirement by hadoop MRJobConfiguration.setMMCJCacheSize(job, (int)cacheSize); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); //run mmcj job RunningJob runjob=JobClient.runJob(job); /* Process different counters */ // NOTE: MMCJ job always has only a single output. // Hence, no need to scan resultIndexes[] like other jobs int outputIndex = 0; Byte outputMatrixID = MRInstructionParser.parseSingleInstruction(aggBinInstrction).output; Group group=runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); // number of non-zeros stats[outputIndex].setNonZeros(group.getCounter(Byte.toString(outputMatrixID))); return new JobReturn(stats[outputIndex], outputinfo, runjob.isSuccessful()); } private static MatrixCharacteristics[] commonSetup(JobConf job, boolean inBlockRepresentation, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, byte resultDimsUnknown, String output, OutputInfo outputinfo) throws Exception { job.setJobName("MMCJ-MR"); if(numReducers<=0) throw new Exception("MMCJ-MR has to have at least one reduce task!"); //whether use block representation or cell representation MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation); byte[] realIndexes=new byte[inputs.length]; for(byte b=0; b<realIndexes.length; b++) realIndexes[b]=b; //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, inBlockRepresentation? ConvertTarget.BLOCK: ConvertTarget.CELL); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer); //set up the aggregate binary operation for the mmcj job MRJobConfiguration.setAggregateBinaryInstructions(job, aggBinInstrction); //set up the replication factor for the results job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); //set up preferred custom serialization framework for binary block format if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION ) MRJobConfiguration.addBinaryBlockSerializationFramework( job ); //set up map/reduce memory configurations (if in AM context) DMLConfig config = ConfigurationManager.getDMLConfig(); DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config); //set up custom map/reduce configurations MRJobConfiguration.setupCustomMRConfigurations(job, config); byte[] resultIndexes=new byte[]{MRInstructionParser.parseSingleInstruction(aggBinInstrction).output}; byte[] resultDimsUnknown_Array = new byte[]{resultDimsUnknown}; // byte[] resultIndexes=new byte[]{AggregateBinaryInstruction.parseMRInstruction(aggBinInstrction).output}; //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes=MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, resultIndexes ); //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[]{output}, new OutputInfo[]{outputinfo}, inBlockRepresentation); // configure mapper job.setMapperClass(MMCJMRMapper.class); job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class); if(inBlockRepresentation) job.setMapOutputValueClass(MatrixBlock.class); else job.setMapOutputValueClass(MatrixCell.class); job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class); job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexPartitioner.class); //configure combiner //TODO: cannot set up combiner, because it will destroy the stable numerical algorithms // for sum or for central moments //if(aggInstructionsInReducer!=null && !aggInstructionsInReducer.isEmpty()) // job.setCombinerClass(MMCJMRCombiner.class); MatrixChar_N_ReducerGroups ret=MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, null, resultIndexes, mapoutputIndexes, true); //set up the number of reducers if( AUTOMATIC_CONFIG_NUM_REDUCERS ){ int numRed = determineNumReducers(rlens, clens, numReducers, ret.numReducerGroups); job.setNumReduceTasks(numRed); } else MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers); //configure reducer // note: the alternative MMCJMRReducer is not maintained job.setReducerClass(MMCJMRReducerWithAggregator.class); return ret.stats; } /** * Determine number of reducers based on configured number of reducers, number of results groups * and input data divided by blocksize (as heuristic for useful degree of parallelism). * * @param rlen array of numbers of rows * @param clen array of numbers of columns * @param defaultNumRed default number of reducers * @param numRedGroups number of reducer groups * @return number of reducers */ protected static int determineNumReducers( long[] rlen, long[] clen, int defaultNumRed, long numRedGroups ) { //init return with default value int ret = defaultNumRed; //determine max output matrix size long maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks(); long blockSize = InfrastructureAnalyzer.getHDFSBlockSize()/(1024*1024); long maxSize = -1; //in MB for( int i=0; i<rlen.length; i++ ) { long tmp = MatrixBlock.estimateSizeOnDisk(rlen[i], clen[i], rlen[i]*clen[i]) / (1024*1024); maxSize = Math.max(maxSize, tmp); } //correction max number of reducers on yarn clusters if( InfrastructureAnalyzer.isYarnEnabled() ) maxNumRed = Math.max( maxNumRed, YarnClusterAnalyzer.getNumCores()/2 ); //increase num reducers wrt input size / hdfs blocksize (up to max reducers) //as a heuristic we allow an increase up to 2x the configured default, now disabled //maxNumRed = Math.min(2 * defaultNumRed, maxNumRed); ret = (int)Math.max(ret, Math.min(maxSize/blockSize, maxNumRed)); //reduce num reducers for few result blocks ret = (int) Math.min(ret, numRedGroups); //ensure there is at least one reducer ret = Math.max(ret, 1); return ret; } }