/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.io;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlockMCSR;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
public class ReaderBinaryBlockParallel extends ReaderBinaryBlock
{
private static int _numThreads = 1;
public ReaderBinaryBlockParallel( boolean localFS )
{
super(localFS);
_numThreads = OptimizerUtils.getParallelBinaryReadParallelism();
}
@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
throws IOException, DMLRuntimeException
{
//allocate output matrix block (incl block allocation for parallel)
MatrixBlock ret = createOutputMatrixBlock(rlen, clen, brlen, bclen, estnnz, true, true);
//prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
Path path = new Path( (_localFS ? "file:///" : "") + fname);
//check existence and non-empty file
checkValidInputFile(fs, path);
//core read
readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);
//finally check if change of sparse/dense block representation required
if( !AGGREGATE_BLOCK_NNZ )
ret.recomputeNonZeros();
ret.examSparsity();
return ret;
}
private static void readBinaryBlockMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen )
throws IOException, DMLRuntimeException
{
//set up preferred custom serialization framework for binary block format
if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
MRJobConfiguration.addBinaryBlockSerializationFramework( job );
try
{
//create read tasks for all files
ExecutorService pool = Executors.newFixedThreadPool(_numThreads);
ArrayList<ReadFileTask> tasks = new ArrayList<ReadFileTask>();
for( Path lpath : getSequenceFilePaths(fs, path) ){
ReadFileTask t = new ReadFileTask(lpath, job, fs, dest, rlen, clen, brlen, bclen);
tasks.add(t);
}
//wait until all tasks have been executed
List<Future<Object>> rt = pool.invokeAll(tasks);
//check for exceptions and aggregate nnz
long lnnz = 0;
for( Future<Object> task : rt )
lnnz += (Long)task.get();
//post-processing
dest.setNonZeros( lnnz );
if( dest.isInSparseFormat() && clen>bclen )
sortSparseRowsParallel(dest, rlen, _numThreads, pool);
pool.shutdown();
}
catch (Exception e) {
throw new IOException("Failed parallel read of binary block input.", e);
}
}
private static class ReadFileTask implements Callable<Object>
{
private Path _path = null;
private JobConf _job = null;
private FileSystem _fs = null;
private MatrixBlock _dest = null;
private long _rlen = -1;
private long _clen = -1;
private int _brlen = -1;
private int _bclen = -1;
public ReadFileTask(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen)
{
_path = path;
_fs = fs;
_job = job;
_dest = dest;
_rlen = rlen;
_clen = clen;
_brlen = brlen;
_bclen = bclen;
}
@Override
@SuppressWarnings({ "deprecation" })
public Object call() throws Exception
{
boolean sparse = _dest.isInSparseFormat();
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
long lnnz = 0; //aggregate block nnz
//directly read from sequence files (individual partfiles)
SequenceFile.Reader reader = new SequenceFile.Reader(_fs,_path,_job);
try
{
//note: next(key, value) does not yet exploit the given serialization classes, record reader does but is generally slower.
while( reader.next(key, value) )
{
//empty block filter (skip entire block)
if( value.isEmptyBlock(false) )
continue;
int row_offset = (int)(key.getRowIndex()-1)*_brlen;
int col_offset = (int)(key.getColumnIndex()-1)*_bclen;
int rows = value.getNumRows();
int cols = value.getNumColumns();
//bound check per block
if( row_offset + rows < 0 || row_offset + rows > _rlen || col_offset + cols<0 || col_offset + cols > _clen )
{
throw new IOException("Matrix block ["+(row_offset+1)+":"+(row_offset+rows)+","+(col_offset+1)+":"+(col_offset+cols)+"] " +
"out of overall matrix range [1:"+_rlen+",1:"+_clen+"].");
}
//copy block to result
if( sparse )
{
//note: append requires final sort
if (cols < _clen ) {
//sparse requires lock, when matrix is wider than one block
//(fine-grained locking of block rows instead of the entire matrix)
//NOTE: fine-grained locking depends on MCSR SparseRow objects
SparseBlock sblock = _dest.getSparseBlock();
if( sblock instanceof SparseBlockMCSR && sblock.get(row_offset) != null ) {
synchronized( sblock.get(row_offset) ){
_dest.appendToSparse(value, row_offset, col_offset);
}
}
else {
synchronized( _dest ){
_dest.appendToSparse(value, row_offset, col_offset);
}
}
}
else { //quickpath (no synchronization)
_dest.appendToSparse(value, row_offset, col_offset);
}
}
else
{
_dest.copy( row_offset, row_offset+rows-1,
col_offset, col_offset+cols-1, value, false );
}
//aggregate nnz
lnnz += value.getNonZeros();
}
}
finally
{
IOUtilFunctions.closeSilently(reader);
}
return lnnz;
}
}
}