/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.File;
import java.util.ArrayList;
import org.apache.hadoop.fs.Path;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.io.MatrixReaderFactory;
import org.apache.sysml.runtime.io.ReaderBinaryBlock;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.util.DataConverter;
public class DistributedCacheInput
{
//internal partitioning parameter (threshold and partition size)
public static final long PARTITION_SIZE = 4000000; //32MB
//public static final String PARTITION_SUFFIX = "_dp";
//meta data of cache input
private Path _localFilePath = null;
private long _rlen = -1;
private long _clen = -1;
private int _brlen = -1;
private int _bclen = -1;
private PDataPartitionFormat _pformat = null;
//data cached input
private IndexedMatrixValue[][] dataBlocks = null;
public DistributedCacheInput(Path p, long rows, long cols, int brlen, int bclen, PDataPartitionFormat pformat)
{
_localFilePath = p;
_rlen = rows;
_clen = cols;
_brlen = brlen;
_bclen = bclen;
_pformat = pformat;
}
public long getNumRows() {
return _rlen;
}
public long getNumCols() {
return _clen;
}
public int getNumRowsPerBlock(){
return _brlen;
}
public int getNumColsPerBlock(){
return _bclen;
}
public void reset()
{
_localFilePath = null;
_rlen = -1;
_clen = -1;
_brlen = -1;
_bclen = -1;
_pformat = null;
}
public IndexedMatrixValue getDataBlock(int rowBlockIndex, int colBlockIndex)
throws DMLRuntimeException
{
//probe missing block (read on-demand)
if( dataBlocks==null || dataBlocks[rowBlockIndex-1][colBlockIndex-1]==null )
readDataBlocks( rowBlockIndex, colBlockIndex );
//return read or existing block
return dataBlocks[rowBlockIndex-1][colBlockIndex-1];
}
public double[] getRowVectorArray()
throws DMLRuntimeException
{
double[] ret = new double[(int)_clen];
for( int j=0; j<_clen; j+=_bclen ) {
MatrixBlock mb = (MatrixBlock) getDataBlock(1, (int)Math.ceil((double)(j+1)/_bclen)).getValue();
double[] mbtmp = DataConverter.convertToDoubleVector(mb);
System.arraycopy(mbtmp, 0, ret, j, mbtmp.length);
}
return ret;
}
public double[] getColumnVectorArray()
throws DMLRuntimeException
{
double[] ret = new double[(int)_rlen];
for( int j=0; j<_rlen; j+=_brlen ) {
MatrixBlock mb = (MatrixBlock) getDataBlock((int)Math.ceil((double)(j+1)/_brlen),1).getValue();
double[] mbtmp = DataConverter.convertToDoubleVector(mb);
System.arraycopy(mbtmp, 0, ret, j, mbtmp.length);
}
return ret;
}
private void readDataBlocks( int rowBlockIndex, int colBlockIndex )
throws DMLRuntimeException
{
//get filename for rowblock/colblock
String fname = _localFilePath.toString();
if( isPartitioned() )
fname = getPartitionFileName(rowBlockIndex, colBlockIndex);
//read matrix partition (or entire vector)
try
{
ReaderBinaryBlock reader = (ReaderBinaryBlock) MatrixReaderFactory.createMatrixReader(InputInfo.BinaryBlockInputInfo);
reader.setLocalFS( !MRBaseForCommonInstructions.isJobLocal );
ArrayList<IndexedMatrixValue> tmp = reader.readIndexedMatrixBlocksFromHDFS(fname, _rlen, _clen, _brlen, _bclen);
int rowBlocks = (int) Math.ceil(_rlen / (double) _brlen);
int colBlocks = (int) Math.ceil(_clen / (double) _bclen);
if( dataBlocks==null )
dataBlocks = new IndexedMatrixValue[rowBlocks][colBlocks];
for (IndexedMatrixValue val : tmp) {
MatrixIndexes idx = val.getIndexes();
dataBlocks[(int) idx.getRowIndex() - 1][(int) idx.getColumnIndex() - 1] = val;
}
}
catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
private boolean isPartitioned()
{
return (_pformat != PDataPartitionFormat.NONE);
}
private String getPartitionFileName( int rowBlockIndex, int colBlockIndex )
throws DMLRuntimeException
{
long partition = -1;
switch( _pformat )
{
case ROW_BLOCK_WISE_N:
{
long numRowBlocks = (long)Math.ceil(((double)PARTITION_SIZE)/_clen/_brlen);
partition = (rowBlockIndex-1)/numRowBlocks + 1;
break;
}
case COLUMN_BLOCK_WISE_N:
{
long numColBlocks = (long)Math.ceil(((double)PARTITION_SIZE)/_rlen/_bclen);
partition = (colBlockIndex-1)/numColBlocks + 1;
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format for distributed cache input: "+_pformat);
}
return _localFilePath.toString() + File.separator + partition;
}
}