/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.io;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
public class ReaderBinaryCell extends MatrixReader
{
@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
throws IOException, DMLRuntimeException
{
//allocate output matrix block
MatrixBlock ret = createOutputMatrixBlock(rlen, clen, (int)rlen, (int)clen, estnnz, true, false);
//prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
FileSystem fs = FileSystem.get(job);
Path path = new Path( fname );
//check existence and non-empty file
checkValidInputFile(fs, path);
//core read
readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);
//finally check if change of sparse/dense block representation required
//(nnz maintained via append during read for both dense/sparse)
ret.examSparsity();
return ret;
}
@Override
public MatrixBlock readMatrixFromInputStream(InputStream is, long rlen, long clen, int brlen, int bclen, long estnnz)
throws IOException, DMLRuntimeException
{
throw new DMLRuntimeException("Not implemented yet.");
}
@SuppressWarnings("deprecation")
private void readBinaryCellMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen )
throws IOException
{
boolean sparse = dest.isInSparseFormat();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
int row = -1;
int col = -1;
try
{
for( Path lpath : getSequenceFilePaths(fs,path) ) //1..N files
{
//directly read from sequence files (individual partfiles)
SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
try
{
if( sparse )
{
while(reader.next(key, value))
{
row = (int)key.getRowIndex()-1;
col = (int)key.getColumnIndex()-1;
double lvalue = value.getValue();
dest.appendValue(row, col, lvalue);
}
}
else
{
while(reader.next(key, value))
{
row = (int)key.getRowIndex()-1;
col = (int)key.getColumnIndex()-1;
double lvalue = value.getValue();
dest.appendValue( row, col, lvalue );
}
}
}
finally
{
IOUtilFunctions.closeSilently(reader);
}
}
if( sparse )
dest.sortSparseRows();
}
catch(Exception ex)
{
//post-mortem error handling and bounds checking
if( row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen )
{
throw new IOException("Matrix cell ["+(row+1)+","+(col+1)+"] " +
"out of overall matrix range [1:"+rlen+",1:"+clen+"].");
}
else
{
throw new IOException( "Unable to read matrix in binary cell format.", ex );
}
}
}
}