/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.spark;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.functionobjects.Multiply;
import org.apache.sysml.runtime.functionobjects.Plus;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.TripleIndexes;
import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
import org.apache.sysml.runtime.matrix.operators.Operator;
public class RmmSPInstruction extends BinarySPInstruction
{
public RmmSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand out, String opcode, String istr )
{
super(op, in1, in2, out, opcode, istr);
_sptype = SPINSTRUCTION_TYPE.RMM;
}
public static RmmSPInstruction parseInstruction( String str )
throws DMLRuntimeException
{
String parts[] = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if ( "rmm".equals(opcode) ) {
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
return new RmmSPInstruction(null, in1, in2, out, opcode, str);
}
else {
throw new DMLRuntimeException("RmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
SparkExecutionContext sec = (SparkExecutionContext)ec;
//get input rdds
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics( input1.getName() );
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics( input2.getName() );
JavaPairRDD<MatrixIndexes,MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable( input1.getName() );
JavaPairRDD<MatrixIndexes,MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable( input2.getName() );
//execute Spark RMM instruction
//step 1: prepare join keys (w/ replication), i/j/k
JavaPairRDD<TripleIndexes,MatrixBlock> tmp1 = in1.flatMapToPair(
new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
JavaPairRDD<TripleIndexes,MatrixBlock> tmp2 = in2.flatMapToPair(
new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
//step 2: join prepared datasets, multiply, and aggregate
JavaPairRDD<MatrixIndexes,MatrixBlock> out =
tmp1.join( tmp2 ) //join by result block
.mapToPair( new RmmMultiplyFunction() ); //do matrix multiplication
out = RDDAggregateUtils.sumByKeyStable(out, false); //aggregation per result block
//put output block into symbol table (no lineage because single block)
updateBinaryMMOutputMatrixCharacteristics(sec, true);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
private static class RmmReplicateFunction implements PairFlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, TripleIndexes, MatrixBlock>
{
private static final long serialVersionUID = 3577072668341033932L;
private long _len = -1;
private long _blen = -1;
private boolean _left = false;
public RmmReplicateFunction(long len, long blen, boolean left)
{
_len = len;
_blen = blen;
_left = left;
}
@Override
public Iterator<Tuple2<TripleIndexes, MatrixBlock>> call( Tuple2<MatrixIndexes, MatrixBlock> arg0 )
throws Exception
{
LinkedList<Tuple2<TripleIndexes, MatrixBlock>> ret = new LinkedList<Tuple2<TripleIndexes, MatrixBlock>>();
MatrixIndexes ixIn = arg0._1();
MatrixBlock blkIn = arg0._2();
long numBlocks = (long) Math.ceil((double)_len/_blen);
if( _left ) //LHS MATRIX
{
//replicate wrt # column blocks in RHS
long i = ixIn.getRowIndex();
long k = ixIn.getColumnIndex();
for( long j=1; j<=numBlocks; j++ ) {
TripleIndexes tmptix = new TripleIndexes(i, j, k);
MatrixBlock tmpblk = new MatrixBlock(blkIn);
ret.add( new Tuple2<TripleIndexes, MatrixBlock>(tmptix, tmpblk) );
}
}
else // RHS MATRIX
{
//replicate wrt # row blocks in LHS
long k = ixIn.getRowIndex();
long j = ixIn.getColumnIndex();
for( long i=1; i<=numBlocks; i++ ) {
TripleIndexes tmptix = new TripleIndexes(i, j, k);
MatrixBlock tmpblk = new MatrixBlock(blkIn);
ret.add( new Tuple2<TripleIndexes, MatrixBlock>(tmptix, tmpblk) );
}
}
//output list of new tuples
return ret.iterator();
}
}
private static class RmmMultiplyFunction implements PairFunction<Tuple2<TripleIndexes, Tuple2<MatrixBlock,MatrixBlock>>, MatrixIndexes, MatrixBlock>
{
private static final long serialVersionUID = -5772410117511730911L;
private AggregateBinaryOperator _op = null;
public RmmMultiplyFunction()
{
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
_op = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
}
@Override
public Tuple2<MatrixIndexes, MatrixBlock> call( Tuple2<TripleIndexes, Tuple2<MatrixBlock,MatrixBlock>> arg0 )
throws Exception
{
//get input blocks per
TripleIndexes ixIn = arg0._1(); //i,j,k
MatrixIndexes ixOut = new MatrixIndexes(ixIn.getFirstIndex(), ixIn.getSecondIndex()); //i,j
MatrixBlock blkIn1 = arg0._2()._1();
MatrixBlock blkIn2 = arg0._2()._2();
MatrixBlock blkOut = new MatrixBlock();
//core block matrix multiplication
blkIn1.aggregateBinaryOperations(blkIn1, blkIn2, blkOut, _op);
//output new tuple
return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut);
}
}
}