/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
 */
package com.ibm.bi.dml.runtime.matrix.data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import com.ibm.bi.dml.lops.PartialAggregate.CorrectionLocationType;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.functionobjects.Builtin;
import com.ibm.bi.dml.runtime.functionobjects.Builtin.BuiltinFunctionCode;
import com.ibm.bi.dml.runtime.functionobjects.IndexFunction;
import com.ibm.bi.dml.runtime.functionobjects.KahanFunction;
import com.ibm.bi.dml.runtime.functionobjects.KahanPlus;
import com.ibm.bi.dml.runtime.functionobjects.KahanPlusSq;
import com.ibm.bi.dml.runtime.functionobjects.Mean;
import com.ibm.bi.dml.runtime.functionobjects.Multiply;
import com.ibm.bi.dml.runtime.functionobjects.ReduceAll;
import com.ibm.bi.dml.runtime.functionobjects.ReduceCol;
import com.ibm.bi.dml.runtime.functionobjects.ReduceDiag;
import com.ibm.bi.dml.runtime.functionobjects.ReduceRow;
import com.ibm.bi.dml.runtime.functionobjects.ValueFunction;
import com.ibm.bi.dml.runtime.instructions.cp.KahanObject;
import com.ibm.bi.dml.runtime.matrix.operators.AggregateOperator;
import com.ibm.bi.dml.runtime.matrix.operators.AggregateUnaryOperator;
import com.ibm.bi.dml.runtime.matrix.operators.UnaryOperator;
import com.ibm.bi.dml.runtime.util.UtilFunctions;
/**
* MB:
* Library for matrix aggregations including ak+, uak+ for all
* combinations of dense and sparse representations, and corrections.
* Those are performance-critical operations because they are used
* on combiners/reducers of important operations like tsmm, mvmult,
* indexing, but also basic sum/min/max/mean, row*, col*, etc. Specific
* handling is especially required for all non sparse-safe operations
* in order to prevent unnecessary worse asymptotic behavior.
*
* This library currently covers the following opcodes:
* ak+, uak+, uark+, uack+, uamin, uarmin, uacmin, uamax, uarmax, uacmax,
* ua*, uamean, uarmean, uacmean, uarimax, uaktrace.
* cumk+, cummin, cummax, cum*, tak+
*
* TODO next opcode extensions: a+, colindexmax
*/
public class LibMatrixAgg
{
//internal configuration parameters
private static final boolean NAN_AWARENESS = false;
private static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements
private static final long PAR_INTERMEDIATE_SIZE_THRESHOLD = 2*1024*1024; //Max 2MB
////////////////////////////////
// public matrix agg interface
////////////////////////////////
private enum AggType {
KAHAN_SUM,
KAHAN_SUM_SQ,
CUM_KAHAN_SUM,
CUM_MIN,
CUM_MAX,
CUM_PROD,
MIN,
MAX,
MEAN,
MAX_INDEX,
MIN_INDEX,
PROD,
INVALID,
}
private LibMatrixAgg() {
//prevent instantiation via private constructor
}
/**
* Core incremental matrix aggregate (ak+) as used in mapmult, tsmm,
* cpmm, etc. Note that we try to keep the current
* aggVal and aggCorr in dense format in order to allow efficient
* access according to the dense/sparse input.
*
*
* @param in input matrix
* @param aggVal current aggregate values (in/out)
* @param aggCorr current aggregate correction (in/out)
* @throws DMLRuntimeException
*/
public static void aggregateBinaryMatrix(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr)
throws DMLRuntimeException
{
//Timing time = new Timing(true);
//boolean saggVal = aggVal.isInSparseFormat(), saggCorr = aggCorr.isInSparseFormat();
//long naggVal = aggVal.getNonZeros(), naggCorr = aggCorr.getNonZeros();
//core aggregation
if(!in.sparse && !aggVal.sparse && !aggCorr.sparse)
aggregateBinaryMatrixAllDense(in, aggVal, aggCorr);
else if(in.sparse && !aggVal.sparse && !aggCorr.sparse)
aggregateBinaryMatrixSparseDense(in, aggVal, aggCorr);
else if(in.sparse ) //any aggVal, aggCorr
aggregateBinaryMatrixSparseGeneric(in, aggVal, aggCorr);
else //if( !in.sparse ) //any aggVal, aggCorr
aggregateBinaryMatrixDenseGeneric(in, aggVal, aggCorr);
//System.out.println("agg ("+in.rlen+","+in.clen+","+in.getNonZeros()+","+in.sparse+"), " +
// "("+naggVal+","+saggVal+"), ("+naggCorr+","+saggCorr+") -> " +
// "("+aggVal.getNonZeros()+","+aggVal.isInSparseFormat()+"), ("+aggCorr.getNonZeros()+","+aggCorr.isInSparseFormat()+") " +
// "in "+time.stop()+"ms.");
}
/**
* Core incremental matrix aggregate (ak+) as used for uack+ and acrk+.
* Embedded correction values.
*
* @param in
* @param aggVal
* @throws DMLRuntimeException
*/
public static void aggregateBinaryMatrix(MatrixBlock in, MatrixBlock aggVal, AggregateOperator aop)
throws DMLRuntimeException
{
//sanity check matching dimensions
if( in.getNumRows()!=aggVal.getNumRows() || in.getNumColumns()!=aggVal.getNumColumns() )
throw new DMLRuntimeException("Dimension mismatch on aggregate: "+in.getNumRows()+"x"+in.getNumColumns()+
" vs "+aggVal.getNumRows()+"x"+aggVal.getNumColumns());
//Timing time = new Timing(true);
//core aggregation
boolean lastRowCorr = (aop.correctionLocation == CorrectionLocationType.LASTROW);
boolean lastColCorr = (aop.correctionLocation == CorrectionLocationType.LASTCOLUMN);
if( !in.sparse && lastRowCorr )
aggregateBinaryMatrixLastRowDenseGeneric(in, aggVal);
else if( in.sparse && lastRowCorr )
aggregateBinaryMatrixLastRowSparseGeneric(in, aggVal);
else if( !in.sparse && lastColCorr )
aggregateBinaryMatrixLastColDenseGeneric(in, aggVal);
else //if( in.sparse && lastColCorr )
aggregateBinaryMatrixLastColSparseGeneric(in, aggVal);
//System.out.println("agg ("+in.rlen+","+in.clen+","+in.getNonZeros()+","+in.sparse+"), ("+naggVal+","+saggVal+") -> " +
// "("+aggVal.getNonZeros()+","+aggVal.isInSparseFormat()+") in "+time.stop()+"ms.");
}
/**
*
* @param in
* @param out
* @param vFn
* @param ixFn
* @throws DMLRuntimeException
*/
public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock out, AggregateUnaryOperator uaop)
throws DMLRuntimeException
{
//prepare meta data
AggType aggtype = getAggType(uaop);
final int m = in.rlen;
final int m2 = out.rlen;
final int n2 = out.clen;
//filter empty input blocks (incl special handling for sparse-unsafe operations)
if( in.isEmptyBlock(false) ){
aggregateUnaryMatrixEmpty(in, out, aggtype, uaop.indexFn);
return;
}
//Timing time = new Timing(true);
//allocate output arrays (if required)
out.reset(m2, n2, false); //always dense
out.allocateDenseBlock();
if( !in.sparse )
aggregateUnaryMatrixDense(in, out, aggtype, uaop.aggOp.increOp.fn, uaop.indexFn, 0, m);
else
aggregateUnaryMatrixSparse(in, out, aggtype, uaop.aggOp.increOp.fn, uaop.indexFn, 0, m);
//cleanup output and change representation (if necessary)
out.recomputeNonZeros();
out.examSparsity();
//System.out.println("uagg ("+in.rlen+","+in.clen+","+in.sparse+") in "+time.stop()+"ms.");
}
/**
*
* @param in
* @param out
* @param uaop
* @param k
* @throws DMLRuntimeException
*/
public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock out, AggregateUnaryOperator uaop, int k)
throws DMLRuntimeException
{
//fall back to sequential version if necessary
if( k <= 1 || (long)in.rlen*in.clen < PAR_NUMCELL_THRESHOLD || in.rlen <= k
|| (!(uaop.indexFn instanceof ReduceCol) && out.clen*8*k > PAR_INTERMEDIATE_SIZE_THRESHOLD ) ) {
aggregateUnaryMatrix(in, out, uaop);
return;
}
//prepare meta data
AggType aggtype = getAggType(uaop);
final int m = in.rlen;
final int m2 = out.rlen;
final int n2 = out.clen;
//filter empty input blocks (incl special handling for sparse-unsafe operations)
if( in.isEmptyBlock(false) ){
aggregateUnaryMatrixEmpty(in, out, aggtype, uaop.indexFn);
return;
}
//Timing time = new Timing(true);
//allocate output arrays (if required)
if( uaop.indexFn instanceof ReduceCol ) {
out.reset(m2, n2, false); //always dense
out.allocateDenseBlock();
}
//core multi-threaded unary aggregate computation
//(currently: always parallelization over number of rows)
try {
ExecutorService pool = Executors.newFixedThreadPool( k );
ArrayList<AggTask> tasks = new ArrayList<AggTask>();
int blklen = (int)(Math.ceil((double)m/k));
for( int i=0; i<k & i*blklen<m; i++ ) {
tasks.add( (uaop.indexFn instanceof ReduceCol) ?
new RowAggTask(in, out, aggtype, uaop, i*blklen, Math.min((i+1)*blklen, m)) :
new PartialAggTask(in, out, aggtype, uaop, i*blklen, Math.min((i+1)*blklen, m)) );
}
pool.invokeAll(tasks);
pool.shutdown();
//aggregate partial results
if( !(uaop.indexFn instanceof ReduceCol) ) {
out.copy(((PartialAggTask)tasks.get(0)).getResult()); //for init
for( int i=1; i<tasks.size(); i++ )
aggregateFinalResult(uaop.aggOp, out, ((PartialAggTask)tasks.get(i)).getResult());
}
}
catch(Exception ex) {
throw new DMLRuntimeException(ex);
}
//cleanup output and change representation (if necessary)
out.recomputeNonZeros();
out.examSparsity();
//System.out.println("uagg k="+k+" ("+in.rlen+","+in.clen+","+in.sparse+") in "+time.stop()+"ms.");
}
/**
*
* @param in
* @param out
* @param uop
* @throws DMLRuntimeException
*/
public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock out, UnaryOperator uop)
throws DMLRuntimeException
{
//prepare meta data
AggType aggtype = getAggType(uop);
final int m = in.rlen;
final int m2 = out.rlen;
final int n2 = out.clen;
//filter empty input blocks (incl special handling for sparse-unsafe operations)
if( in.isEmptyBlock(false) ){
aggregateUnaryMatrixEmpty(in, out, aggtype, null);
return;
}
//allocate output arrays (if required)
out.reset(m2, n2, false); //always dense
out.allocateDenseBlock();
//Timing time = new Timing(true);
if( !in.sparse )
aggregateUnaryMatrixDense(in, out, aggtype, uop.fn, null, 0, m);
else
aggregateUnaryMatrixSparse(in, out, aggtype, uop.fn, null, 0, m);
//cleanup output and change representation (if necessary)
out.recomputeNonZeros();
out.examSparsity();
//System.out.println("uop ("+in.rlen+","+in.clen+","+in.sparse+") in "+time.stop()+"ms.");
}
/**
*
* @param in1
* @param in2
* @param in3
* @param ret
* @return
*/
public static double aggregateTernary(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3)
{
//early abort if any block is empty
if( in1.isEmptyBlock(false) || in2.isEmptyBlock(false) || in3!=null&&in3.isEmptyBlock(false) ) {
return 0;
}
//Timing time = new Timing(true);
double val = -1;
if( !in1.sparse && !in2.sparse && (in3==null||!in3.sparse) ) //DENSE
val = aggregateTernaryDense(in1, in2, in3, 0, in1.rlen);
else //GENERAL CASE
val = aggregateTernaryGeneric(in1, in2, in3, 0, in1.rlen);
//System.out.println("tak+ ("+in1.rlen+","+in1.sparse+","+in2.sparse+","+in3.sparse+") in "+time.stop()+"ms.");
return val;
}
/**
* @throws DMLRuntimeException
*
*/
public static double aggregateTernary(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, int k)
throws DMLRuntimeException
{
//fall back to sequential version if necessary
if( k <= 1 || in1.rlen/3 < PAR_NUMCELL_THRESHOLD ) {
return aggregateTernary(in1, in2, in3);
}
//early abort if any block is empty
if( in1.isEmptyBlock(false) || in2.isEmptyBlock(false) || in3!=null&&in3.isEmptyBlock(false) ) {
return 0;
}
//Timing time = new Timing(true);
double val = -1;
try {
ExecutorService pool = Executors.newFixedThreadPool( k );
ArrayList<AggTernaryTask> tasks = new ArrayList<AggTernaryTask>();
int blklen = (int)(Math.ceil((double)in1.rlen/k));
for( int i=0; i<k & i*blklen<in1.rlen; i++ )
tasks.add( new AggTernaryTask(in1, in2, in3, i*blklen, Math.min((i+1)*blklen, in1.rlen)));
pool.invokeAll(tasks);
pool.shutdown();
//aggregate partial results
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
for( AggTernaryTask task : tasks )
kplus.execute2(kbuff, task.getResult());
val = kbuff._sum;
}
catch(Exception ex) {
throw new DMLRuntimeException(ex);
}
//System.out.println("tak+ k="+k+" ("+in1.rlen+","+in1.sparse+","+in2.sparse+","+in3.sparse+") in "+time.stop()+"ms.");
return val;
}
/**
*
* @param op
* @return
*/
public static boolean isSupportedUnaryAggregateOperator( AggregateUnaryOperator op )
{
AggType type = getAggType( op );
return (type != AggType.INVALID);
}
public static boolean isSupportedUnaryOperator( UnaryOperator op )
{
AggType type = getAggType( op );
return (type != AggType.INVALID);
}
/**
* Recompute outputs (e.g., maxindex or minindex) according to block indexes from MR.
* TODO: this should not be part of block operations but of the MR instruction.
*
* @param out
* @param op
* @param brlen
* @param bclen
* @param ix
*/
public static void recomputeIndexes( MatrixBlock out, AggregateUnaryOperator op, int brlen, int bclen, MatrixIndexes ix )
{
AggType type = getAggType(op);
if( (type == AggType.MAX_INDEX || type == AggType.MIN_INDEX) && ix.getColumnIndex()!=1 ) //MAXINDEX or MININDEX
{
int m = out.rlen;
double[] c = out.getDenseArray();
for( int i=0, cix=0; i<m; i++, cix+=2 )
c[cix] = UtilFunctions.cellIndexCalculation(ix.getColumnIndex(), bclen, (int)c[cix]-1);
}
}
/**
*
* @param op
* @return
*/
private static AggType getAggType( AggregateUnaryOperator op )
{
ValueFunction vfn = op.aggOp.increOp.fn;
IndexFunction ifn = op.indexFn;
//(kahan) sum / sum squared / trace (for ReduceDiag)
if( vfn instanceof KahanFunction
&& (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW)
&& (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag) )
{
if (vfn instanceof KahanPlus)
return AggType.KAHAN_SUM;
else if (vfn instanceof KahanPlusSq)
return AggType.KAHAN_SUM_SQ;
}
//mean
if( vfn instanceof Mean
&& (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS)
&& (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow) )
{
return AggType.MEAN;
}
//prod
if( vfn instanceof Multiply && ifn instanceof ReduceAll )
{
return AggType.PROD;
}
//min / max
if( vfn instanceof Builtin &&
(ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow) )
{
BuiltinFunctionCode bfcode = ((Builtin)vfn).bFunc;
switch( bfcode ){
case MAX: return AggType.MAX;
case MIN: return AggType.MIN;
case MAXINDEX: return AggType.MAX_INDEX;
case MININDEX: return AggType.MIN_INDEX;
default: //do nothing
}
}
return AggType.INVALID;
}
/**
*
* @param op
* @return
*/
private static AggType getAggType( UnaryOperator op )
{
ValueFunction vfn = op.fn;
//cumsum/cumprod/cummin/cummax
if( vfn instanceof Builtin ) {
BuiltinFunctionCode bfunc = ((Builtin) vfn).bFunc;
switch( bfunc )
{
case CUMSUM: return AggType.CUM_KAHAN_SUM;
case CUMPROD: return AggType.CUM_PROD;
case CUMMIN: return AggType.CUM_MIN;
case CUMMAX: return AggType.CUM_MAX;
default: return AggType.INVALID;
}
}
return AggType.INVALID;
}
/**
*
* @param aop
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
private static void aggregateFinalResult( AggregateOperator aop, MatrixBlock out, MatrixBlock partout )
throws DMLUnsupportedOperationException, DMLRuntimeException
{
AggregateOperator laop = aop;
//special handling for mean where the final aggregate operator (kahan plus)
//is not equals to the partial aggregate operator
if( aop.increOp.fn instanceof Mean ) {
laop = new AggregateOperator(0, KahanPlus.getKahanPlusFnObject(), aop.correctionExists, aop.correctionLocation);
}
//incremental aggregation of final results
if( laop.correctionExists )
out.incrementalAggregate(laop, partout);
else
out.binaryOperationsInPlace(laop.increOp, partout);
}
/**
*
* @param in1
* @param in2
* @param in3
* @param rl
* @param ru
* @return
*/
private static double aggregateTernaryDense(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, int rl, int ru)
{
//compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
double[] a = in1.denseBlock;
double[] b = in2.denseBlock;
final int n = in1.clen;
if( in3 != null ) //3 inputs
{
double[] c = in3.denseBlock;
for( int i=rl, ix=rl*n; i<ru; i++ )
for( int j=0; j<n; j++, ix++ ) {
double val = a[ix] * b[ix] * c[ix];
kplus.execute2( kbuff, val );
}
}
else //2 inputs (third: literal 1)
{
for( int i=rl, ix=rl*n; i<ru; i++ )
for( int j=0; j<n; j++, ix++ ) {
double val = a[ix] * b[ix];
kplus.execute2( kbuff, val );
}
}
return kbuff._sum;
}
/**
*
* @param in1
* @param in2
* @param in3
* @param rl
* @param ru
* @return
*/
private static double aggregateTernaryGeneric(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, int rl, int ru)
{
//compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
final int n = in1.clen;
if( in1.sparse )
{
SparseRow[] a = in1.sparseRows;
for( int i=rl; i<ru; i++ )
if( a[i]!=null && !a[i].isEmpty() ) {
int alen = a[i].size();
int[] aix = a[i].getIndexContainer();
double[] avals = a[i].getValueContainer();
for( int j=0; j<alen; j++ ) {
double val1 = avals[j];
double val2 = in2.quickGetValue(i, aix[j]);
double val = val1 * val2;
if( val != 0 && in3 != null )
val *= in3.quickGetValue(i, aix[j]);
kplus.execute2( kbuff, val );
}
}
}
else //generic case
{
for( int i=rl; i<ru; i++ )
for( int j=0; j<n; j++ ){
double val1 = in1.quickGetValue(i, j);
double val2 = in2.quickGetValue(i, j);
double val = val1 * val2;
if( in3 != null )
val *= in3.quickGetValue(i, j);
kplus.execute2( kbuff, val );
}
}
return kbuff._sum;
}
/**
*
* @param in
* @param aggVal
* @param aggCorr
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixAllDense(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr)
throws DMLRuntimeException
{
if( in.denseBlock==null || in.isEmptyBlock(false) )
return;
//allocate output arrays (if required)
aggVal.allocateDenseBlock(); //should always stay in dense
aggCorr.allocateDenseBlock(); //should always stay in dense
double[] a = in.getDenseArray();
double[] c = aggVal.getDenseArray();
double[] cc = aggCorr.getDenseArray();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int len = Math.min(a.length, in.rlen*in.clen);
int nnzC = 0;
int nnzCC = 0;
for( int i=0; i<len; i++ )
{
buffer1._sum = c[i];
buffer1._correction = cc[i];
akplus.execute2(buffer1, a[i]);
c[i] = buffer1._sum;
cc[i] = buffer1._correction;
nnzC += (buffer1._sum!=0)?1:0;
nnzCC += (buffer1._correction!=0)?1:0;
}
aggVal.nonZeros = nnzC;
aggCorr.nonZeros = nnzCC;
//aggVal.examSparsity();
//aggCorr.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @param aggCorr
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixSparseDense(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr)
throws DMLRuntimeException
{
if( in.sparseRows==null || in.isEmptyBlock(false) )
return;
//allocate output arrays (if required)
aggVal.allocateDenseBlock(); //should always stay in dense
aggCorr.allocateDenseBlock(); //should always stay in dense
SparseRow[] a = in.getSparseRows();
double[] c = aggVal.getDenseArray();
double[] cc = aggCorr.getDenseArray();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int n = in.clen;
final int rlen = Math.min(a.length, m);
for( int i=0, cix=0; i<rlen; i++, cix+=n )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
int[] aix = arow.getIndexContainer();
double[] avals = arow.getValueContainer();
for( int j=0; j<alen; j++ )
{
int ix = cix+aix[j];
buffer1._sum = c[ix];
buffer1._correction = cc[ix];
akplus.execute2(buffer1, avals[j]);
c[ix] = buffer1._sum;
cc[ix] = buffer1._correction;
}
}
}
aggVal.recomputeNonZeros();
aggCorr.recomputeNonZeros();
//aggVal.examSparsity();
//aggCorr.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @param aggCorr
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixSparseGeneric(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr)
throws DMLRuntimeException
{
if( in.sparseRows==null || in.isEmptyBlock(false) )
return;
SparseRow[] a = in.getSparseRows();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int rlen = Math.min(a.length, m);
for( int i=0; i<rlen; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
int[] aix = arow.getIndexContainer();
double[] avals = arow.getValueContainer();
for( int j=0; j<alen; j++ )
{
int jix = aix[j];
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggCorr.quickGetValue(i, jix);
akplus.execute2(buffer1, avals[j]);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggCorr.quickSetValue(i, jix, buffer1._correction);
}
}
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
aggCorr.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @param aggCorr
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixDenseGeneric(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr)
throws DMLRuntimeException
{
if( in.denseBlock==null || in.isEmptyBlock(false) )
return;
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseArray();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
//incl implicit nnz maintenance
for(int i=0, ix=0; i<m; i++)
for(int j=0; j<n; j++, ix++)
{
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggCorr.quickGetValue(i, j);
akplus.execute(buffer, a[ix]);
aggVal.quickSetValue(i, j, buffer._sum);
aggCorr.quickSetValue(i, j, buffer._correction);
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
aggCorr.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixLastRowDenseGeneric(MatrixBlock in, MatrixBlock aggVal)
throws DMLRuntimeException
{
if( in.denseBlock==null || in.isEmptyBlock(false) )
return;
final int m = in.rlen;
final int n = in.clen;
final int cix = (m-1)*n;
double[] a = in.getDenseArray();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
//incl implicit nnz maintenance
for(int i=0, ix=0; i<m-1; i++)
for(int j=0; j<n; j++, ix++)
{
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggVal.quickGetValue(m-1, j);
akplus.execute(buffer, a[ix], a[cix+j]);
aggVal.quickSetValue(i, j, buffer._sum);
aggVal.quickSetValue(m-1, j, buffer._correction);
}
//note: nnz of aggVal maintained internally
aggVal.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixLastRowSparseGeneric(MatrixBlock in, MatrixBlock aggVal)
throws DMLRuntimeException
{
//sparse-safe operation
if( in.sparseRows==null || in.isEmptyBlock(false) )
return;
SparseRow[] a = in.getSparseRows();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int rlen = Math.min(a.length, m);
for( int i=0; i<rlen-1; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
int[] aix = arow.getIndexContainer();
double[] avals = arow.getValueContainer();
for( int j=0; j<alen; j++ )
{
int jix = aix[j];
double corr = in.quickGetValue(m-1, jix);
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggVal.quickGetValue(m-1, jix);
akplus.execute(buffer1, avals[j], corr);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggVal.quickSetValue(m-1, jix, buffer1._correction);
}
}
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixLastColDenseGeneric(MatrixBlock in, MatrixBlock aggVal)
throws DMLRuntimeException
{
if( in.denseBlock==null || in.isEmptyBlock(false) )
return;
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseArray();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
//incl implicit nnz maintenance
for(int i=0, ix=0; i<m; i++, ix+=n)
for(int j=0; j<n-1; j++)
{
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggVal.quickGetValue(i, n-1);
akplus.execute(buffer, a[ix+j], a[ix+j+1]);
aggVal.quickSetValue(i, j, buffer._sum);
aggVal.quickSetValue(i, n-1, buffer._correction);
}
//note: nnz of aggVal maintained internally
aggVal.examSparsity();
}
/**
*
* @param in
* @param aggVal
* @throws DMLRuntimeException
*/
private static void aggregateBinaryMatrixLastColSparseGeneric(MatrixBlock in, MatrixBlock aggVal)
throws DMLRuntimeException
{
//sparse-safe operation
if( in.sparseRows==null || in.isEmptyBlock(false) )
return;
SparseRow[] a = in.getSparseRows();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int n = in.clen;
final int rlen = Math.min(a.length, m);
for( int i=0; i<rlen; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
int[] aix = arow.getIndexContainer();
double[] avals = arow.getValueContainer();
for( int j=0; j<alen && aix[j]<n-1; j++ )
{
int jix = aix[j];
double corr = in.quickGetValue(i, n-1);
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggVal.quickGetValue(i, n-1);
akplus.execute(buffer1, avals[j], corr);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggVal.quickSetValue(i, n-1, buffer1._correction);
}
}
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
}
/**
*
* @param in
* @param out
* @param vFn
* @param ixFn
* @throws DMLRuntimeException
*/
private static void aggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru)
throws DMLRuntimeException
{
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseArray();
double[] c = out.getDenseArray();
switch( optype )
{
case KAHAN_SUM: //SUM/TRACE via k+,
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) // SUM
d_uakp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWSUM
d_uarkp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLSUM
d_uackp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceDiag ) //TRACE
d_uakptrace(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
break;
}
case KAHAN_SUM_SQ: //SUM_SQ via k+,
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) //SUM_SQ
d_uasqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWSUM_SQ
d_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLSUM_SQ
d_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
break;
}
case CUM_KAHAN_SUM: //CUMSUM
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
d_ucumkp(a, c, m, n, kbuff, kplus);
break;
}
case CUM_PROD: //CUMPROD
{
d_ucumm(a, c, m, n);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1);
d_ucummxx(a, c, m, n, init, (Builtin)vFn);
break;
}
case MIN:
case MAX: //MAX/MIN
{
double init = Double.MAX_VALUE * ((optype==AggType.MAX)?-1:1);
if( ixFn instanceof ReduceAll ) // MIN/MAX
d_uamxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWMIN/ROWMAX
d_uarmxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLMIN/COLMAX
d_uacmxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = -Double.MAX_VALUE;
if( ixFn instanceof ReduceCol ) //ROWINDEXMAX
d_uarimxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.MAX_VALUE;
if( ixFn instanceof ReduceCol ) //ROWINDEXMIN
d_uarimin(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MEAN: //MEAN
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) // MEAN
d_uamean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWMEAN
d_uarmean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLMEAN
d_uacmean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
break;
}
case PROD: //PROD
{
if( ixFn instanceof ReduceAll ) // PROD
d_uam(a, c, m, n, rl, ru );
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: "+optype);
}
}
/**
*
* @param in
* @param out
* @param vFn
* @param ixFn
* @throws DMLRuntimeException
*/
private static void aggregateUnaryMatrixSparse(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru)
throws DMLRuntimeException
{
final int m = in.rlen;
final int n = in.clen;
SparseRow[] a = in.getSparseRows();
double[] c = out.getDenseArray();
switch( optype )
{
case KAHAN_SUM: //SUM via k+
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) // SUM
s_uakp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWSUM
s_uarkp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLSUM
s_uackp(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
else if( ixFn instanceof ReduceDiag ) //TRACE
s_uakptrace(a, c, m, n, kbuff, (KahanPlus)vFn, rl, ru);
break;
}
case KAHAN_SUM_SQ: //SUM_SQ via k+
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) //SUM_SQ
s_uasqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWSUM_SQ
s_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLSUM_SQ
s_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq)vFn, rl, ru);
break;
}
case CUM_KAHAN_SUM: //CUMSUM
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
s_ucumkp(a, c, m, n, kbuff, kplus);
break;
}
case CUM_PROD: //CUMPROD
{
s_ucumm(a, c, m, n);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1);
s_ucummxx(a, c, m, n, init, (Builtin)vFn);
break;
}
case MIN:
case MAX: //MAX/MIN
{
double init = Double.MAX_VALUE * ((optype==AggType.MAX)?-1:1);
if( ixFn instanceof ReduceAll ) // MIN/MAX
s_uamxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWMIN/ROWMAX
s_uarmxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLMIN/COLMAX
s_uacmxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = -Double.MAX_VALUE;
if( ixFn instanceof ReduceCol ) //ROWINDEXMAX
s_uarimxx(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.MAX_VALUE;
if( ixFn instanceof ReduceCol ) //ROWINDEXMAX
s_uarimin(a, c, m, n, init, (Builtin)vFn, rl, ru);
break;
}
case MEAN:
{
KahanObject kbuff = new KahanObject(0, 0);
if( ixFn instanceof ReduceAll ) // MEAN
s_uamean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
else if( ixFn instanceof ReduceCol ) //ROWMEAN
s_uarmean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
else if( ixFn instanceof ReduceRow ) //COLMEAN
s_uacmean(a, c, m, n, kbuff, (Mean)vFn, rl, ru);
break;
}
case PROD: //PROD
{
if( ixFn instanceof ReduceAll ) // PROD
s_uam(a, c, m, n, rl, ru );
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: "+optype);
}
}
/**
*
* @param in
* @param out
* @param optype
* @param ixFn
* @throws DMLRuntimeException
*/
private static void aggregateUnaryMatrixEmpty(MatrixBlock in, MatrixBlock out, AggType optype, IndexFunction ixFn)
throws DMLRuntimeException
{
//do nothing for pseudo sparse-safe operations
if(optype==AggType.KAHAN_SUM || optype==AggType.KAHAN_SUM_SQ
|| optype==AggType.MIN || optype==AggType.MAX || optype==AggType.PROD
|| optype == AggType.CUM_KAHAN_SUM || optype == AggType.CUM_PROD
|| optype == AggType.CUM_MIN || optype == AggType.CUM_MAX)
{
return;
}
//compute result based on meta data only
switch( optype )
{
case MAX_INDEX:
{
if( ixFn instanceof ReduceCol ) { //ROWINDEXMAX
for(int i=0; i<out.rlen; i++) {
out.quickSetValue(i, 0, in.clen); //maxindex
}
}
break;
}
case MIN_INDEX:
{
if( ixFn instanceof ReduceCol ) //ROWINDEXMIN
for(int i=0; i<out.rlen; i++) {
out.quickSetValue(i, 0, in.clen); //minindex
}
break;
}
case MEAN:
{
if( ixFn instanceof ReduceAll ) // MEAN
out.quickSetValue(0, 1, in.rlen*in.clen); //count
else if( ixFn instanceof ReduceCol ) //ROWMEAN
for( int i=0; i<in.rlen; i++ ) //0-sum and 0-correction
out.quickSetValue(i, 1, in.clen); //count
else if( ixFn instanceof ReduceRow ) //COLMEAN
for( int j=0; j<in.clen; j++ ) //0-sum and 0-correction
out.quickSetValue(1, j, in.rlen); //count
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: "+optype);
}
}
////////////////////////////////////////////
// core aggregation functions //
////////////////////////////////////////////
/**
* SUM, opcode: uak+, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uakp( double[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
int len = Math.min((ru-rl)*n, a.length);
sum( a, rl*n, len, kbuff, kplus );
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* ROWSUM, opcode: uark+, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uarkp( double[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
for( int i=rl, aix=rl*n, cix=rl*2; i<ru; i++, aix+=n, cix+=2 )
{
kbuff.set(0, 0); //reset buffer
sum( a, aix, n, kbuff, kplus );
c[cix+0] = kbuff._sum;
c[cix+1] = kbuff._correction;
}
}
/**
* COLSUM, opcode: uack+, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uackp( double[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
for( int i=rl, aix=rl*n; i<ru; i++, aix+=n )
sumAgg( a, c, aix, 0, n, kbuff, kplus );
}
/**
* SUM_SQ, opcode: uasqk+, dense input.
*
* @param a Array of values to square & sum.
* @param c Output array to store sum and correction factor.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void d_uasqkp(double[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru)
{
int len = Math.min((ru-rl)*n, a.length);
sumSq(a, rl*n, len, kbuff, kplusSq);
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* ROWSUM_SQ, opcode: uarsqk+, dense input.
*
* @param a Array of values to square & sum row-wise.
* @param c Output array to store sum and correction factor
* for each row.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void d_uarsqkp(double[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru)
{
for (int i=rl, aix=rl*n, cix=rl*2; i<ru; i++, aix+=n, cix+=2) {
kbuff.set(0, 0); //reset buffer
sumSq(a, aix, n, kbuff, kplusSq);
c[cix+0] = kbuff._sum;
c[cix+1] = kbuff._correction;
}
}
/**
* COLSUM_SQ, opcode: uacsqk+, dense input.
*
* @param a Array of values to square & sum column-wise.
* @param c Output array to store sum and correction factor
* for each column.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void d_uacsqkp(double[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru)
{
for (int i=rl, aix=rl*n; i<ru; i++, aix+=n)
sumSqAgg(a, c, aix, 0, n, kbuff, kplusSq);
}
/**
* CUMSUM, opcode: ucumk+, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_ucumkp( double[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus )
{
//init current row sum/correction arrays w/ neutral 0
double[] csums = new double[ 2*n ];
Arrays.fill(csums, 0);
//scan once and compute prefix sums
for( int i=0, aix=0; i<m; i++, aix+=n ) {
sumAgg( a, csums, aix, 0, n, kbuff, kplus );
System.arraycopy(csums, 0, c, aix, n);
}
}
/**
* CUMPROD, opcode: ucum*, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_ucumm( double[] a, double[] c, int m, int n )
{
//init current row product array w/ neutral 1
double[] cprods = new double[ n ];
Arrays.fill(cprods, 1);
//scan once and compute prefix products
for( int i=0, aix=0; i<m; i++, aix+=n ) {
productAgg( a, cprods, aix, 0, n );
System.arraycopy(cprods, 0, c, aix, n);
}
}
/**
* CUMMIN/CUMMAX, opcode: ucummin/ucummax, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param builtin
*/
private static void d_ucummxx( double[] a, double[] c, int m, int n, double init, Builtin builtin )
{
//init current row min/max array w/ extreme value
double[] cmxx = new double[ n ];
Arrays.fill(cmxx, init);
//scan once and compute prefix min/max
for( int i=0, aix=0; i<m; i++, aix+=n ) {
builtinAgg( a, cmxx, aix, n, builtin );
System.arraycopy(cmxx, 0, c, aix, n);
}
}
/**
* TRACE, opcode: uaktrace
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uakptrace( double[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
//aggregate diag (via ix=n+1)
for( int i=rl, aix=rl*n+rl; i<ru; i++, aix+=(n+1) )
kplus.execute2(kbuff, a[ aix ]);
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* MIN/MAX, opcode: uamin/uamax, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uamxx( double[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
int len = Math.min((ru-rl)*n, a.length);
c[0] = builtin(a, rl*n, init, len, builtin);
}
/**
* ROWMIN/ROWMAX, opcode: uarmin/uarmax, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param builtin
*/
private static void d_uarmxx( double[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
for( int i=rl, aix=rl*n; i<ru; i++, aix+=n )
c[i] = builtin(a, aix, init, n, builtin);
}
/**
* COLMIN/COLMAX, opcode: uacmin/uacmax, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param builtin
*/
private static void d_uacmxx( double[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
//init output (base for incremental agg)
Arrays.fill(c, init);
//execute builtin aggregate
for( int i=rl, aix=rl*n; i<ru; i++, aix+=n )
builtinAgg( a, c, aix, n, builtin );
}
/**
* ROWINDEXMAX, opcode: uarimax, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void d_uarimxx( double[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
for( int i=rl, aix=rl*n, cix=rl*2; i<ru; i++, aix+=n, cix+=2 )
{
int maxindex = indexmax(a, aix, init, n, builtin);
c[cix+0] = (double)maxindex + 1;
c[cix+1] = a[aix+maxindex]; //max value
}
}
/**
* ROWINDEXMIN, opcode: uarimin, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void d_uarimin( double[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
for( int i=rl, aix=rl*n, cix=rl*2; i<ru; i++, aix+=n, cix+=2 )
{
int minindex = indexmin(a, aix, init, n, builtin);
c[cix+0] = (double)minindex + 1;
c[cix+1] = a[aix+minindex]; //min value
}
}
/**
* MEAN, opcode: uamean, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uamean( double[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
int len = Math.min((ru-rl)*n, a.length);
mean(a, rl*n, len, 0, kbuff, kmean);
c[0] = kbuff._sum;
c[1] = len;
c[2] = kbuff._correction;
}
/**
* ROWMEAN, opcode: uarmean, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void d_uarmean( double[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
for( int i=rl, aix=rl*n, cix=rl*3; i<ru; i++, aix+=n, cix+=3 )
{
kbuff.set(0, 0); //reset buffer
mean(a, aix, n, 0, kbuff, kmean);
c[cix+0] = kbuff._sum;
c[cix+1] = n;
c[cix+2] = kbuff._correction;
}
}
/**
* COLMEAN, opcode: uacmean, dense input.
*
* @param a
* @param c
* @param m
* @param n
* @param builtin
*/
private static void d_uacmean( double[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
//init output (base for incremental agg)
Arrays.fill(c, 0); //including counts
//execute builtin aggregate
for( int i=rl, aix=rl*n; i<ru; i++, aix+=n )
meanAgg( a, c, aix, 0, n, kbuff, kmean );
}
/**
* PROD, opcode: ua*, dense input.
*
* @param a
* @param c
* @param m
* @param n
*/
private static void d_uam( double[] a, double[] c, int m, int n, int rl, int ru )
{
int len = Math.min((ru-rl)*n, a.length);
c[0] = product( a, rl*n, len );
}
/**
* SUM, opcode: uak+, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uakp( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
sum(avals, 0, alen, kbuff, kplus);
}
}
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* ROWSUM, opcode: uark+, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uarkp( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
//compute row aggregates
for( int i=rl, cix=rl*2; i<ru; i++, cix+=2 )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
kbuff.set(0, 0); //reset buffer
sum( avals, 0, alen, kbuff, kplus );
c[cix+0] = kbuff._sum;
c[cix+1] = kbuff._correction;
}
}
}
/**
* COLSUM, opcode: uack+, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uackp( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
//init result (for empty columns)
Arrays.fill(c, 0);
//compute column aggregates
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
sumAgg( avals, c, aix, alen, n, kbuff, kplus );
}
}
}
/**
* SUM_SQ, opcode: uasqk+, sparse input.
*
* @param a Sparse array of values to square & sum.
* @param c Output array to store sum and correction factor.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void s_uasqkp(SparseRow[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru )
{
for (int i=rl; i<ru; i++) {
SparseRow arow = a[i];
if (arow!=null && !arow.isEmpty()) {
int alen = arow.size();
double[] avals = arow.getValueContainer();
sumSq(avals, 0, alen, kbuff, kplusSq);
}
}
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* ROWSUM_SQ, opcode: uarsqk+, sparse input.
*
* @param a Sparse array of values to square & sum row-wise.
* @param c Output array to store sum and correction factor
* for each row.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void s_uarsqkp(SparseRow[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru )
{
//compute row aggregates
for (int i=rl, cix=rl*2; i<ru; i++, cix+=2) {
SparseRow arow = a[i];
if (arow!=null && !arow.isEmpty()) {
int alen = arow.size();
double[] avals = arow.getValueContainer();
kbuff.set(0, 0); //reset buffer
sumSq(avals, 0, alen, kbuff, kplusSq);
c[cix+0] = kbuff._sum;
c[cix+1] = kbuff._correction;
}
}
}
/**
* COLSUM_SQ, opcode: uacsqk+, sparse input.
*
* @param a Sparse array of values to square & sum column-wise.
* @param c Output array to store sum and correction factor
* for each column.
* @param m Number of rows.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kplusSq A KahanPlusSq object to perform summation of
* squared values.
* @param rl Lower row limit.
* @param ru Upper row limit.
*/
private static void s_uacsqkp(SparseRow[] a, double[] c, int m, int n, KahanObject kbuff,
KahanPlusSq kplusSq, int rl, int ru )
{
//init result (for empty columns)
Arrays.fill(c, 0);
//compute column aggregates
for (int i=rl; i<ru; i++) {
SparseRow arow = a[i];
if (arow!=null && !arow.isEmpty()) {
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
sumSqAgg(avals, c, aix, alen, n, kbuff, kplusSq);
}
}
}
/**
* CUMSUM, opcode: ucumk+, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_ucumkp( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus )
{
//init current row sum/correction arrays w/ neutral 0
double[] csums = new double[ 2*n ];
Arrays.fill(csums, 0);
//scan once and compute prefix sums
for( int i=0, ix=0; i<m; i++, ix+=n )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
sumAgg( avals, csums, aix, alen, n, kbuff, kplus );
}
//always copy current sum (not sparse-safe)
System.arraycopy(csums, 0, c, ix, n);
}
}
/**
* CUMPROD, opcode: ucum*, sparse input.
*
* @param a
* @param c
* @param m
* @param n
*/
private static void s_ucumm( SparseRow[] a, double[] c, int m, int n )
{
//init current row prod arrays w/ neutral 1
double[] cprod = new double[ n ];
Arrays.fill(cprod, 1);
//init count arrays (helper, see correction)
int[] cnt = new int[ n ];
Arrays.fill(cnt, 0); //init count array
//scan once and compute prefix products
for( int i=0, ix=0; i<m; i++, ix+=n )
{
SparseRow arow = a[i];
//multiply row of non-zero elements
if( arow!=null && !arow.isEmpty() ) {
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
productAgg( avals, cprod, aix, 0, alen );
countAgg( avals, cnt, aix, alen );
}
//correction (not sparse-safe and cumulative)
//note: we need to determine if there are only nnz in a column
for( int j=0; j<n; j++ )
if( cnt[j] < i+1 ) //no dense column
cprod[j] *= 0;
//always copy current sum (not sparse-safe)
System.arraycopy(cprod, 0, c, ix, n);
}
}
/**
* CUMMIN/CUMMAX, opcode: ucummin/ucummax, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_ucummxx( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin )
{
//init current row min/max array w/ extreme value
double[] cmxx = new double[ n ];
Arrays.fill(cmxx, init);
//init count arrays (helper, see correction)
int[] cnt = new int[ n ];
Arrays.fill(cnt, 0); //init count array
//compute column aggregates min/max
for( int i=0, ix=0; i<m; i++, ix+=n )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
builtinAgg( avals, cmxx, aix, alen, builtin );
countAgg( avals, cnt, aix, alen );
}
//correction (not sparse-safe and cumulative)
//note: we need to determine if there are only nnz in a column
for( int j=0; j<n; j++ )
if( cnt[j] < i+1 ) //no dense column
cmxx[j] = builtin.execute2(cmxx[j], 0);
//always copy current sum (not sparse-safe)
System.arraycopy(cmxx, 0, c, ix, n);
}
}
/**
* TRACE, opcode: uaktrace, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uakptrace( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, KahanPlus kplus, int rl, int ru )
{
for( int i=rl; i<ru; i++ ) {
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
double val = arow.get( i );
kplus.execute2(kbuff, val);
}
}
c[0] = kbuff._sum;
c[1] = kbuff._correction;
}
/**
* MIN/MAX, opcode: uamin/uamax, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_uamxx( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
double ret = init; //keep init val
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
double lval = builtin(avals, 0, init, alen, builtin);
ret = builtin.execute2(ret, lval);
}
//correction (not sparse-safe)
if( arow==null || arow.size()<n )
ret = builtin.execute2(ret, 0);
}
c[0] = ret;
}
/**
* ROWMIN/ROWMAX, opcode: uarmin/uarmax, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_uarmxx( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
//init result (for empty rows)
Arrays.fill(c, rl, ru, init); //not sparse-safe
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
c[ i ] = builtin(avals, 0, init, alen, builtin);
}
//correction (not sparse-safe)
if( arow==null || arow.size()<n )
c[ i ] = builtin.execute2(c[ i ], 0);
}
}
/**
* COLMIN/COLMAX, opcode: uacmin/uacmax, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_uacmxx( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
//init output (base for incremental agg)
Arrays.fill(c, init);
//init count arrays (helper, see correction)
int[] cnt = new int[ n ];
Arrays.fill(cnt, 0); //init count array
//compute column aggregates min/max
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
builtinAgg( avals, c, aix, alen, builtin );
countAgg( avals, cnt, aix, alen );
}
}
//correction (not sparse-safe)
//note: we need to determine if there are only nnz in a column
// in order to know if for example a colMax of -8 is true or need
// to be replaced with a 0 because there was a missing nonzero.
for( int i=0; i<n; i++ )
if( cnt[i] < m ) //no dense column
c[i] = builtin.execute2(c[i], 0);
}
/**
* ROWINDEXMAX, opcode: uarimax, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_uarimxx( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
for( int i=rl, cix=rl*2; i<ru; i++, cix+=2 )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
int maxindex = indexmax(avals, 0, init, alen, builtin);
c[cix+0] = (double)aix[maxindex] + 1;
c[cix+1] = avals[maxindex]; //max value
//correction (not sparse-safe)
if(alen < n && (builtin.execute2( 0, c[cix+1] ) == 1))
{
int ix = n-1; //find last 0 value
for( int j=alen-1; j>=0; j--, ix-- )
if( aix[j]!=ix )
break;
c[cix+0] = ix + 1; //max index (last)
c[cix+1] = 0; //max value
}
}
else //if( arow==null )
{
//correction (not sparse-safe)
c[cix+0] = n; //max index (last)
c[cix+1] = 0; //max value
}
}
}
/**
* ROWINDEXMIN, opcode: uarimin, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param builtin
*/
private static void s_uarimin( SparseRow[] a, double[] c, int m, int n, double init, Builtin builtin, int rl, int ru )
{
for( int i=rl, cix=rl*2; i<ru; i++, cix+=2 )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
int minindex = indexmin(avals, 0, init, alen, builtin);
c[cix+0] = (double)aix[minindex] + 1;
c[cix+1] = avals[minindex]; //min value among non-zeros
//correction (not sparse-safe)
if(alen < n && (builtin.execute2( 0, c[cix+1] ) == 1))
{
int ix = n-1; //find last 0 value
for( int j=alen-1; j>=0; j--, ix-- )
if( aix[j]!=ix )
break;
c[cix+0] = ix + 1; //min index (last)
c[cix+1] = 0; //min value
}
}
else //if( arow==null )
{
//correction (not sparse-safe)
c[cix+0] = n; //min index (last)
c[cix+1] = 0; //min value
}
}
}
/**
* MEAN, opcode: uamean, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uamean( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
int len = (ru-rl) * n;
int count = 0;
//correction remaining tuples (not sparse-safe)
//note: before aggregate computation in order to
//exploit 0 sum (noop) and better numerical stability
for( int i=rl; i<ru; i++ )
count += (a[i]==null)? n : n-a[i].size();
//compute aggregate mean
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
mean(avals, 0, alen, count, kbuff, kmean);
count += alen;
}
}
//OLD VERSION: correction remaining tuples (not sparse-safe)
//mean(0, len-count, count, kbuff, kplus);
c[0] = kbuff._sum;
c[1] = len;
c[2] = kbuff._correction;
}
/**
* ROWMEAN, opcode: uarmean, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param kbuff
* @param kplus
*/
private static void s_uarmean( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
for( int i=rl, cix=rl*3; i<ru; i++, cix+=3 )
{
//correction remaining tuples (not sparse-safe)
//note: before aggregate computation in order to
//exploit 0 sum (noop) and better numerical stability
int count = (a[i]==null)? n : n-a[i].size();
kbuff.set(0, 0); //reset buffer
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
mean(avals, 0, alen, count, kbuff, kmean);
}
//OLD VERSION: correction remaining tuples (not sparse-safe)
//int count = ((arow==null) ? 0 : arow.size());
//mean(0, n-count, count, kbuff, kplus);
c[cix+0] = kbuff._sum;
c[cix+1] = n;
c[cix+2] = kbuff._correction;
}
}
/**
* COLMEAN, opcode: uacmean, sparse input.
*
* @param a
* @param c
* @param m
* @param n
* @param init
* @param kbuff
* @param kplus
*/
private static void s_uacmean( SparseRow[] a, double[] c, int m, int n, KahanObject kbuff, Mean kmean, int rl, int ru )
{
//init output (base for incremental agg)
Arrays.fill(c, 0);
//correction remaining tuples (not sparse-safe)
//note: before aggregate computation in order to
//exploit 0 sum (noop) and better numerical stability
Arrays.fill(c, n, n*2, ru-rl);
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
countDisAgg( avals, c, aix, n, alen );
}
}
//compute column aggregate means
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
int[] aix = arow.getIndexContainer();
meanAgg( avals, c, aix, alen, n, kbuff, kmean );
}
}
}
/**
* PROD, opcode: ua*, sparse input.
*
* @param a
* @param c
* @param m
* @param n
*/
private static void s_uam( SparseRow[] a, double[] c, int m, int n, int rl, int ru )
{
double ret = 1;
for( int i=rl; i<ru; i++ )
{
SparseRow arow = a[i];
if( arow!=null && !arow.isEmpty() )
{
int alen = arow.size();
double[] avals = arow.getValueContainer();
ret *= product(avals, 0, alen);
ret *= (alen<n) ? 0 : 1;
}
//early abort (note: in case of NaNs this is an invalid optimization)
if( !NAN_AWARENESS && ret==0 ) break;
}
c[0] = ret;
}
////////////////////////////////////////////
// performance-relevant utility functions //
////////////////////////////////////////////
/**
* Summation using the Kahan summation algorithm with the
* KahanPlus function.
*/
private static void sum(double[] a, int ai, final int len, KahanObject kbuff, KahanPlus kplus)
{
sumWithFn(a, ai, len, kbuff, kplus);
}
/**
* Aggregated summation using the Kahan summation algorithm with
* the KahanPlus function.
*/
private static void sumAgg(double[] a, double[] c, int ai, int ci, final int len,
KahanObject kbuff, KahanPlus kplus)
{
sumAggWithFn(a, c, ai, ci, len, kbuff, kplus);
}
/**
* Aggregated summation using the Kahan summation algorithm with
* the KahanPlus function.
*/
private static void sumAgg(double[] a, double[] c, int[] ai, final int len, final int n,
KahanObject kbuff, KahanPlus kplus)
{
sumAggWithFn(a, c, ai, len, n, kbuff, kplus);
}
/**
* Summation of squared values using the Kahan summation algorithm
* with the KahanPlusSq function.
*/
private static void sumSq(double[] a, int ai, final int len,
KahanObject kbuff, KahanPlusSq kplusSq)
{
sumWithFn(a, ai, len, kbuff, kplusSq);
}
/**
* Aggregated summation of squared values using the Kahan
* summation algorithm with the KahanPlusSq function.
*/
private static void sumSqAgg(double[] a, double[] c, int ai, int ci, final int len,
KahanObject kbuff, KahanPlusSq kplusSq)
{
sumAggWithFn(a, c, ai, ci, len, kbuff, kplusSq);
}
/**
* Aggregated summation of squared values using the Kahan
* summation algorithm with the KahanPlusSq function.
*/
private static void sumSqAgg(double[] a, double[] c, int[] ai, final int len, final int n,
KahanObject kbuff, KahanPlusSq kplusSq)
{
sumAggWithFn(a, c, ai, len, n, kbuff, kplusSq);
}
/**
* Summation using the Kahan summation algorithm with one of the
* Kahan functions.
*
* @param a Array of values to sum.
* @param ai Index at which to start processing.
* @param len Number of values to process, starting at index ai.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kfunc A KahanFunction object to perform the summation.
*/
private static void sumWithFn(double[] a, int ai, final int len,
KahanObject kbuff, KahanFunction kfunc)
{
for (int i=0; i<len; i++, ai++)
kfunc.execute2(kbuff, a[ai]);
}
/**
* Aggregated summation using the Kahan summation algorithm
* with one of the Kahan functions.
*
* @param a Array of values to sum.
* @param c Output array to store aggregated sum and correction
* factors.
* @param ai Index at which to start processing array `a`.
* @param ci Index at which to start storing aggregated results
* into array `c`.
* @param len Number of values to process, starting at index ai.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kfunc A KahanFunction object to perform the summation.
*/
private static void sumAggWithFn(double[] a, double[] c, int ai, int ci, final int len,
KahanObject kbuff, KahanFunction kfunc)
{
for (int i=0; i<len; i++, ai++, ci++) {
kbuff._sum = c[ci];
kbuff._correction = c[ci+len];
kfunc.execute2(kbuff, a[ai]);
c[ci] = kbuff._sum;
c[ci+len] = kbuff._correction;
}
}
/**
* Aggregated summation using the Kahan summation algorithm
* with one of the Kahan functions.
*
* @param a Array of values to sum.
* @param c Output array to store aggregated sum and correction
* factors.
* @param ai Array of indices to process for array `a`.
* @param len Number of indices in `ai` to process.
* @param n Number of values per row.
* @param kbuff A KahanObject to hold the current sum and
* correction factor for the Kahan summation
* algorithm.
* @param kfunc A KahanFunction object to perform the summation.
*/
private static void sumAggWithFn(double[] a, double[] c, int[] ai, final int len, final int n,
KahanObject kbuff, KahanFunction kfunc)
{
for (int i=0; i<len; i++) {
kbuff._sum = c[ai[i]];
kbuff._correction = c[ai[i]+n];
kfunc.execute2(kbuff, a[i]);
c[ai[i]] = kbuff._sum;
c[ai[i]+n] = kbuff._correction;
}
}
/**
*
* @param a
* @param len
* @return
*/
private static double product( double[] a, int ai, final int len )
{
double val = 1;
if( NAN_AWARENESS )
{
//product without early abort
//even if val is 0, it might turn into NaN.
for( int i=0; i<len; i++, ai++ )
val *= a[ ai ];
}
else
{
//product with early abort (if 0)
//note: this will not work with NaNs (invalid optimization)
for( int i=0; i<len && val!=0; i++, ai++ )
val *= a[ ai ];
}
return val;
}
/**
*
* @param a
* @param c
* @param ai
* @param ci
* @param len
*/
private static void productAgg( double[] a, double[] c, int ai, int ci, final int len )
{
//always w/ NAN_AWARENESS: product without early abort;
//even if val is 0, it might turn into NaN.
//(early abort would require column-flags and branches)
for( int i=0; i<len; i++, ai++, ci++ )
c[ ci ] *= a[ ai ];
}
/**
*
* @param a
* @param c
* @param ai
* @param ci
* @param len
*/
private static void productAgg( double[] a, double[] c, int[] ai, int ci, final int len )
{
//always w/ NAN_AWARENESS: product without early abort;
//even if val is 0, it might turn into NaN.
//(early abort would require column-flags and branches)
for( int i=0; i<len; i++ )
c[ ci + ai[i] ] *= a[ i ];
}
/**
*
* @param a
* @param ai
* @param len
* @param kbuff
* @param kplus
*/
private static void mean( double[] a, int ai, final int len, int count, KahanObject kbuff, Mean mean )
{
for( int i=0; i<len; i++, ai++, count++ )
{
//delta: (newvalue-buffer._sum)/count
mean.execute2(kbuff, a[ai], count+1);
}
}
/*
private static void mean( final double aval, final int len, int count, KahanObject kbuff, KahanPlus kplus )
{
for( int i=0; i<len; i++, count++ )
{
//delta: (newvalue-buffer._sum)/count
kplus.execute2(kbuff, (aval-kbuff._sum)/(count+1));
}
}
*/
/**
*
* @param a
* @param c
* @param ai
* @param ci
* @param len
* @param kbuff
* @param kplus
*/
private static void meanAgg( double[] a, double[] c, int ai, int ci, final int len, KahanObject kbuff, Mean mean )
{
for( int i=0; i<len; i++, ai++, ci++ )
{
kbuff._sum = c[ci];
double count = c[ci+len] + 1;
kbuff._correction = c[ci+2*len];
mean.execute2(kbuff, a[ai], count);
c[ci] = kbuff._sum;
c[ci+len] = count;
c[ci+2*len] = kbuff._correction;
}
}
/**
*
* @param a
* @param c
* @param ai
* @param len
* @param kbuff
* @param kplus
*/
private static void meanAgg( double[] a, double[] c, int[] ai, final int len, final int n, KahanObject kbuff, Mean mean )
{
for( int i=0; i<len; i++ )
{
kbuff._sum = c[ai[i]];
double count = c[ai[i]+n] + 1;
kbuff._correction = c[ai[i]+2*n];
mean.execute2(kbuff, a[ i ], count);
c[ai[i]] = kbuff._sum;
c[ai[i]+n] = count;
c[ai[i]+2*n] = kbuff._correction;
}
}
/**
* Meant for builtin function ops (min, max)
*
* @param a
* @param ai
* @param init
* @param len
* @param aggop
* @return
* @throws DMLRuntimeException
*/
private static double builtin( double[] a, int ai, final double init, final int len, Builtin aggop )
{
double val = init;
for( int i=0; i<len; i++, ai++ )
val = aggop.execute2( val, a[ ai ] );
return val;
}
/**
*
* @param a
* @param c
* @param ai
* @param len
* @param aggop
*/
private static void builtinAgg( double[] a, double[] c, int ai, final int len, Builtin aggop )
{
for( int i=0; i<len; i++, ai++ )
c[ i ] = aggop.execute2( c[ i ], a[ ai ] );
}
/**
*
* @param a
* @param c
* @param ai
* @param len
* @param aggop
*/
private static void builtinAgg( double[] a, double[] c, int[] ai, final int len, Builtin aggop )
{
for( int i=0; i<len; i++ )
c[ ai[i] ] = aggop.execute2( c[ ai[i] ], a[ i ] );
}
/**
*
* @param a
* @param ai
* @param init
* @param len
* @param aggop
* @return
*/
private static int indexmax( double[] a, int ai, final double init, final int len, Builtin aggop )
{
double maxval = init;
int maxindex = -1;
for( int i=ai; i<ai+len; i++ ) {
maxindex = (a[i]>=maxval) ? i-ai : maxindex;
maxval = (a[i]>=maxval) ? a[i] : maxval;
}
return maxindex;
}
/**
*
* @param a
* @param ai
* @param init
* @param len
* @param aggop
* @return
*/
private static int indexmin( double[] a, int ai, final double init, final int len, Builtin aggop )
{
double minval = init;
int minindex = -1;
for( int i=ai; i<ai+len; i++ ) {
minindex = (a[i]<=minval) ? i-ai : minindex;
minval = (a[i]<=minval) ? a[i] : minval;
}
return minindex;
}
/**
*
* @param a
* @param c
* @param ai
* @param len
*/
private static void countAgg( double[] a, int[] c, int[] ai, final int len )
{
final int bn = len%8;
//compute rest, not aligned to 8-block
for( int i=0; i<bn; i++ )
c[ ai[i] ]++;
//unrolled 8-block (for better instruction level parallelism)
for( int i=bn; i<len; i+=8 )
{
c[ ai[ i+0 ] ] ++;
c[ ai[ i+1 ] ] ++;
c[ ai[ i+2 ] ] ++;
c[ ai[ i+3 ] ] ++;
c[ ai[ i+4 ] ] ++;
c[ ai[ i+5 ] ] ++;
c[ ai[ i+6 ] ] ++;
c[ ai[ i+7 ] ] ++;
}
}
private static void countDisAgg( double[] a, double[] c, int[] ai, final int ci, final int len )
{
final int bn = len%8;
//compute rest, not aligned to 8-block
for( int i=0; i<bn; i++ )
c[ ci+ai[i] ]--;
//unrolled 8-block (for better instruction level parallelism)
for( int i=bn; i<len; i+=8 )
{
c[ ci+ai[ i+0 ] ] --;
c[ ci+ai[ i+1 ] ] --;
c[ ci+ai[ i+2 ] ] --;
c[ ci+ai[ i+3 ] ] --;
c[ ci+ai[ i+4 ] ] --;
c[ ci+ai[ i+5 ] ] --;
c[ ci+ai[ i+6 ] ] --;
c[ ci+ai[ i+7 ] ] --;
}
}
/////////////////////////////////////////////////////////
// Task Implementations for Multi-Threaded Operations //
/////////////////////////////////////////////////////////
private static abstract class AggTask implements Callable<Object> {}
/**
*
*
*/
private static class RowAggTask extends AggTask
{
private MatrixBlock _in = null;
private MatrixBlock _ret = null;
private AggType _aggtype = null;
private AggregateUnaryOperator _uaop = null;
private int _rl = -1;
private int _ru = -1;
protected RowAggTask( MatrixBlock in, MatrixBlock ret, AggType aggtype, AggregateUnaryOperator uaop, int rl, int ru )
{
_in = in;
_ret = ret;
_aggtype = aggtype;
_uaop = uaop;
_rl = rl;
_ru = ru;
}
@Override
public Object call() throws DMLRuntimeException
{
if( !_in.sparse )
aggregateUnaryMatrixDense(_in, _ret, _aggtype, _uaop.aggOp.increOp.fn, _uaop.indexFn, _rl, _ru);
else
aggregateUnaryMatrixSparse(_in, _ret, _aggtype, _uaop.aggOp.increOp.fn, _uaop.indexFn, _rl, _ru);
return null;
}
}
/**
*
*
*/
private static class PartialAggTask extends AggTask
{
private MatrixBlock _in = null;
private MatrixBlock _ret = null;
private AggType _aggtype = null;
private AggregateUnaryOperator _uaop = null;
private int _rl = -1;
private int _ru = -1;
protected PartialAggTask( MatrixBlock in, MatrixBlock ret, AggType aggtype, AggregateUnaryOperator uaop, int rl, int ru )
throws DMLRuntimeException
{
_in = in;
_aggtype = aggtype;
_uaop = uaop;
_rl = rl;
_ru = ru;
//allocate local result for partial aggregation
_ret = new MatrixBlock(ret.rlen, ret.clen, false);
_ret.allocateDenseBlock();
}
@Override
public Object call() throws DMLRuntimeException
{
if( !_in.sparse )
aggregateUnaryMatrixDense(_in, _ret, _aggtype, _uaop.aggOp.increOp.fn, _uaop.indexFn, _rl, _ru);
else
aggregateUnaryMatrixSparse(_in, _ret, _aggtype, _uaop.aggOp.increOp.fn, _uaop.indexFn, _rl, _ru);
//recompute non-zeros of partial result
_ret.recomputeNonZeros();
return null;
}
public MatrixBlock getResult() {
return _ret;
}
}
/**
*
*/
private static class AggTernaryTask extends AggTask
{
private MatrixBlock _in1 = null;
private MatrixBlock _in2 = null;
private MatrixBlock _in3 = null;
private double _ret = -1;
private int _rl = -1;
private int _ru = -1;
protected AggTernaryTask( MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, int rl, int ru )
throws DMLRuntimeException
{
_in1 = in1;
_in2 = in2;
_in3 = in3;
_rl = rl;
_ru = ru;
}
@Override
public Object call() throws DMLRuntimeException
{
if( !_in1.sparse && !_in2.sparse && (_in3==null||!_in3.sparse) ) //DENSE
_ret = aggregateTernaryDense(_in1, _in2, _in3, _rl, _ru);
else //GENERAL CASE
_ret = aggregateTernaryGeneric(_in1, _in2, _in3, _rl, _ru);
return null;
}
public double getResult() {
return _ret;
}
}
}