/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.spark;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Tuple2;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.Multiply;
import org.apache.sysml.runtime.functionobjects.Plus;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.operators.BinaryOperator;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.UnaryOperator;
public class CumulativeOffsetSPInstruction extends BinarySPInstruction
{
private BinaryOperator _bop = null;
private UnaryOperator _uop = null;
private double _initValue = 0;
public CumulativeOffsetSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand out, double init, String opcode, String istr)
{
super(op, in1, in2, out, opcode, istr);
_sptype = SPINSTRUCTION_TYPE.CumsumOffset;
if( "bcumoffk+".equals(opcode) ) {
_bop = new BinaryOperator(Plus.getPlusFnObject());
_uop = new UnaryOperator(Builtin.getBuiltinFnObject("ucumk+"));
}
else if( "bcumoff*".equals(opcode) ){
_bop = new BinaryOperator(Multiply.getMultiplyFnObject());
_uop = new UnaryOperator(Builtin.getBuiltinFnObject("ucum*"));
}
else if( "bcumoffmin".equals(opcode) ){
_bop = new BinaryOperator(Builtin.getBuiltinFnObject("min"));
_uop = new UnaryOperator(Builtin.getBuiltinFnObject("ucummin"));
}
else if( "bcumoffmax".equals(opcode) ){
_bop = new BinaryOperator(Builtin.getBuiltinFnObject("max"));
_uop = new UnaryOperator(Builtin.getBuiltinFnObject("ucummax"));
}
_initValue = init;
}
public static CumulativeOffsetSPInstruction parseInstruction ( String str )
throws DMLRuntimeException
{
String[] parts = InstructionUtils.getInstructionPartsWithValueType( str );
InstructionUtils.checkNumFields ( parts, 4 );
String opcode = parts[0];
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
double init = Double.parseDouble(parts[4]);
return new CumulativeOffsetSPInstruction(null, in1, in2, out, init, opcode, str);
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
SparkExecutionContext sec = (SparkExecutionContext)ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input2.getName());
long rlen = mc.getRows();
int brlen = mc.getRowsPerBlock();
//get inputs
JavaPairRDD<MatrixIndexes,MatrixBlock> inData = sec.getBinaryBlockRDDHandleForVariable( input1.getName() );
JavaPairRDD<MatrixIndexes,MatrixBlock> inAgg = sec.getBinaryBlockRDDHandleForVariable( input2.getName() );
//prepare aggregates (cumsplit of offsets)
inAgg = inAgg.flatMapToPair(new RDDCumSplitFunction(_initValue, rlen, brlen));
//execute cumulative offset (apply cumulative op w/ offsets)
JavaPairRDD<MatrixIndexes,MatrixBlock> out =
inData.join( inAgg )
.mapValues(new RDDCumOffsetFunction(_uop, _bop));
updateUnaryOutputMatrixCharacteristics(sec);
//put output handle in symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
private static class RDDCumSplitFunction implements PairFlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, MatrixIndexes, MatrixBlock>
{
private static final long serialVersionUID = -8407407527406576965L;
private double _initValue = 0;
private int _brlen = -1;
private long _lastRowBlockIndex;
public RDDCumSplitFunction( double initValue, long rlen, int brlen )
{
_initValue = initValue;
_brlen = brlen;
_lastRowBlockIndex = (long)Math.ceil((double)rlen/brlen);
}
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call( Tuple2<MatrixIndexes, MatrixBlock> arg0 )
throws Exception
{
ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
MatrixIndexes ixIn = arg0._1();
MatrixBlock blkIn = arg0._2();
long rixOffset = (ixIn.getRowIndex()-1)*_brlen;
boolean firstBlk = (ixIn.getRowIndex() == 1);
boolean lastBlk = (ixIn.getRowIndex() == _lastRowBlockIndex );
//introduce offsets w/ init value for first row
if( firstBlk ) {
MatrixIndexes tmpix = new MatrixIndexes(1, ixIn.getColumnIndex());
MatrixBlock tmpblk = new MatrixBlock(1, blkIn.getNumColumns(), blkIn.isInSparseFormat());
if( _initValue != 0 ){
for( int j=0; j<blkIn.getNumColumns(); j++ )
tmpblk.appendValue(0, j, _initValue);
}
ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(tmpix, tmpblk));
}
//output splitting (shift by one), preaggregated offset used by subsequent block
for( int i=0; i<blkIn.getNumRows(); i++ )
if( !(lastBlk && i==(blkIn.getNumRows()-1)) ) //ignore last row
{
MatrixIndexes tmpix = new MatrixIndexes(rixOffset+i+2, ixIn.getColumnIndex());
MatrixBlock tmpblk = new MatrixBlock(1, blkIn.getNumColumns(), blkIn.isInSparseFormat());
blkIn.sliceOperations(i, i, 0, blkIn.getNumColumns()-1, tmpblk);
ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(tmpix, tmpblk));
}
return ret.iterator();
}
}
private static class RDDCumOffsetFunction implements Function<Tuple2<MatrixBlock, MatrixBlock>, MatrixBlock>
{
private static final long serialVersionUID = -5804080263258064743L;
private UnaryOperator _uop = null;
private BinaryOperator _bop = null;
public RDDCumOffsetFunction(UnaryOperator uop, BinaryOperator bop)
{
_uop = uop;
_bop = bop;
}
@Override
public MatrixBlock call(Tuple2<MatrixBlock, MatrixBlock> arg0)
throws Exception
{
//prepare inputs and outputs
MatrixBlock dblkIn = arg0._1(); //original data
MatrixBlock oblkIn = arg0._2(); //offset row vector
MatrixBlock blkOut = new MatrixBlock(dblkIn.getNumRows(), dblkIn.getNumColumns(), dblkIn.isInSparseFormat());
//blockwise offset aggregation and prefix sum computation
MatrixBlock data2 = new MatrixBlock(dblkIn); //cp data
MatrixBlock fdata2 = data2.sliceOperations(0, 0, 0, data2.getNumColumns()-1, new MatrixBlock()); //1-based
fdata2.binaryOperationsInPlace(_bop, oblkIn); //sum offset to first row
data2.copy(0, 0, 0, data2.getNumColumns()-1, fdata2, true); //0-based
data2.unaryOperations(_uop, blkOut); //compute columnwise prefix sums/prod/min/max
return blkOut;
}
}
}