/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.AdaptivePartialBlock;
import org.apache.sysml.runtime.matrix.data.IJV;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.PartialBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlock.Type;
import org.apache.sysml.runtime.matrix.data.TaggedAdaptivePartialBlock;
import org.apache.sysml.runtime.util.UtilFunctions;
public class ReblockBuffer
{
//default buffer size: 5M -> 5M * 3x8B = 120MB
public static final int DEFAULT_BUFFER_SIZE = 5000000;
//buffer <long rowindex, long colindex, long value>
//(pure long buffer for sort on flush)
private long[][] _buff = null;
private int _bufflen = -1;
private int _count = -1;
private long _rlen = -1;
private long _clen = -1;
private int _brlen = -1;
private int _bclen = -1;
public ReblockBuffer( int buffersize, long rlen, long clen, int brlen, int bclen )
{
_bufflen = buffersize;
_count = 0;
_buff = new long[ _bufflen ][3];
_rlen = rlen;
_clen = clen;
_brlen = brlen;
_bclen = bclen;
}
public void appendCell( long r, long c, double v )
{
long tmp = Double.doubleToRawLongBits(v);
_buff[_count][0] = r;
_buff[_count][1] = c;
_buff[_count][2] = tmp;
_count++;
}
public void appendBlock(long r_offset, long c_offset, MatrixBlock inBlk, byte index, OutputCollector<Writable, Writable> out )
throws IOException
{
if( inBlk.isInSparseFormat() ) //SPARSE
{
Iterator<IJV> iter = inBlk.getSparseBlockIterator();
while( iter.hasNext() )
{
IJV cell = iter.next();
long tmp = Double.doubleToRawLongBits(cell.getV());
_buff[_count][0] = r_offset + cell.getI();
_buff[_count][1] = c_offset + cell.getJ();
_buff[_count][2] = tmp;
_count++;
//check and flush if required
if( _count ==_bufflen )
flushBuffer(index, out);
}
}
else //DENSE
{
//System.out.println("dense merge with ro="+r_offset+", co="+c_offset);
int rlen = inBlk.getNumRows();
int clen = inBlk.getNumColumns();
for( int i=0; i<rlen; i++ )
for( int j=0; j<clen; j++ )
{
double val = inBlk.getValueDenseUnsafe(i, j);
if( val !=0 )
{
long tmp = Double.doubleToRawLongBits(val);
_buff[_count][0] = r_offset + i;
_buff[_count][1] = c_offset + j;
_buff[_count][2] = tmp;
_count++;
//check and flush if required
if( _count ==_bufflen )
flushBuffer(index, out);
}
}
}
}
public int getSize()
{
return _count;
}
public int getCapacity()
{
return _bufflen;
}
public void flushBuffer( byte index, OutputCollector<Writable, Writable> out )
throws IOException
{
if( _count == 0 )
return;
//Step 1) sort reblock buffer (blockwise, no in-block sorting!)
Arrays.sort( _buff, 0 ,_count, new ReblockBufferComparator() );
//Step 2) scan for number of created blocks
long numBlocks = 0; //number of blocks in buffer
long cbi = -1, cbj = -1; //current block indexes
for( int i=0; i<_count; i++ )
{
long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
//switch to next block
if( bi != cbi || bj != cbj ) {
cbi = bi;
cbj = bj;
numBlocks++;
}
}
//Step 3) decide on intermediate representation (for entire buffer)
//decision based on binarycell vs binaryblock_ultrasparse (worstcase)
long blockedSize = 16 * numBlocks + 16 * _count; //<long,long>,#<int,int,double>
long cellSize = 24 * _count; //#<long,long>,<double>
boolean blocked = ( blockedSize <= cellSize );
//Step 4) output blocks / binary cell (one-at-a-time)
TaggedAdaptivePartialBlock outTVal = new TaggedAdaptivePartialBlock();
AdaptivePartialBlock outVal = new AdaptivePartialBlock();
MatrixIndexes tmpIx = new MatrixIndexes();
outTVal.setTag(index);
outTVal.setBaseObject(outVal); //setup wrapper writables
if( blocked ) //output binaryblock
{
//create intermediate blocks
boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count/numBlocks);
MatrixBlock tmpBlock = new MatrixBlock();
//put values into block and output
cbi = -1; cbj = -1; //current block indexes
for( int i=0; i<_count; i++ )
{
long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
//output block and switch to next index pair
if( bi != cbi || bj != cbj ) {
outputBlock(out, tmpIx, outTVal, tmpBlock);
cbi = bi;
cbj = bj;
tmpIx.setIndexes(bi, bj);
tmpBlock.reset(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)),
Math.min(_bclen, (int)(_clen-(bj-1)*_bclen)), sparse);
}
int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
double tmp = Double.longBitsToDouble(_buff[i][2]);
tmpBlock.appendValue(ci, cj, tmp);
}
//output last block
outputBlock(out, tmpIx, outTVal, tmpBlock);
}
else //output binarycell
{
PartialBlock tmpVal = new PartialBlock();
outVal.set(tmpVal);
for( int i=0; i<_count; i++ )
{
long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
double tmp = Double.longBitsToDouble(_buff[i][2]);
tmpIx.setIndexes(bi, bj);
tmpVal.set(ci, cj, tmp); //in outVal, in outTVal
out.collect(tmpIx, outTVal);
}
}
_count = 0;
}
public void flushBufferToBinaryBlocks( ArrayList<IndexedMatrixValue> outList )
throws IOException, DMLRuntimeException
{
if( _count == 0 )
return;
//Step 1) sort reblock buffer (blockwise, no in-block sorting!)
Arrays.sort( _buff, 0 ,_count, new ReblockBufferComparator() );
//Step 2) scan for number of created blocks
long numBlocks = 0; //number of blocks in buffer
long cbi = -1, cbj = -1; //current block indexes
for( int i=0; i<_count; i++ )
{
long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
//switch to next block
if( bi != cbi || bj != cbj ) {
cbi = bi;
cbj = bj;
numBlocks++;
}
}
//Step 3) output blocks
boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count/numBlocks);
MatrixIndexes tmpIx = new MatrixIndexes();
MatrixBlock tmpBlock = new MatrixBlock();
//put values into block and output
cbi = -1; cbj = -1; //current block indexes
for( int i=0; i<_count; i++ )
{
long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
//output block and switch to next index pair
if( bi != cbi || bj != cbj ) {
outputBlock(outList, tmpIx, tmpBlock);
cbi = bi;
cbj = bj;
tmpIx = new MatrixIndexes(bi, bj);
tmpBlock = new MatrixBlock(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)),
Math.min(_bclen, (int)(_clen-(bj-1)*_bclen)), sparse);
}
int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
double tmp = Double.longBitsToDouble(_buff[i][2]);
tmpBlock.appendValue(ci, cj, tmp);
}
//output last block
outputBlock(outList, tmpIx, tmpBlock);
_count = 0;
}
private static void outputBlock( OutputCollector<Writable, Writable> out, MatrixIndexes key, TaggedAdaptivePartialBlock value, MatrixBlock block )
throws IOException
{
//skip output of unassigned blocks
if( key.getRowIndex() == -1 || key.getColumnIndex() == -1 )
return;
//sort sparse rows due to blockwise buffer sort and append
if( block.isInSparseFormat() )
block.sortSparseRows();
//output block
value.getBaseObject().set(block);
out.collect(key, value);
}
private static void outputBlock( ArrayList<IndexedMatrixValue> out, MatrixIndexes key, MatrixBlock value )
throws IOException, DMLRuntimeException
{
//skip output of unassigned blocks
if( key.getRowIndex() == -1 || key.getColumnIndex() == -1 )
return;
//sort sparse rows due to blockwise buffer sort and append
if( value.isInSparseFormat() )
value.sortSparseRows();
//ensure correct representation (for in-memory blocks)
value.examSparsity();
//convert ultra-sparse blocks from MCSR to COO in order to
//significantly reduce temporary memory pressure until write
if( value.isUltraSparse() )
value = new MatrixBlock(value, Type.COO, false);
//output block
out.add(new IndexedMatrixValue(key,value));
}
/**
* Comparator to sort the reblock buffer by block indexes, where we
* compute the block indexes on-the-fly based on the given cell indexes.
*
*/
private class ReblockBufferComparator implements Comparator<long[]>
{
@Override
public int compare(long[] arg0, long[] arg1)
{
long bi0 = UtilFunctions.computeBlockIndex( arg0[0], _brlen );
long bj0 = UtilFunctions.computeBlockIndex( arg0[1], _bclen );
long bi1 = UtilFunctions.computeBlockIndex( arg1[0], _brlen );
long bj1 = UtilFunctions.computeBlockIndex( arg1[1], _bclen );
return ( bi0 < bi1 || (bi0 == bi1 && bj0 < bj1) ) ? -1 :
(( bi0 == bi1 && bj0 == bj1)? 0 : 1);
}
}
}