/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.runtime.io;
import java.io.EOFException;
import java.io.IOException;
import java.util.LinkedList;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.util.MapReduceTool;
/**
* Base class for all format-specific matrix readers. Every reader is required to implement the basic
* read functionality but might provide additional custom functionality. Any non-default parameters
* (e.g., CSV read properties) should be passed into custom constructors. There is also a factory
* for creating format-specific readers.
*
*/
public abstract class MatrixReader
{
//internal configuration
protected static final boolean AGGREGATE_BLOCK_NNZ = true;
/**
*
* @param fname
* @param rlen
* @param clen
* @param brlen
* @param bclen
* @param expNnz
* @return
*/
public abstract MatrixBlock readMatrixFromHDFS( String fname, long rlen, long clen, int brlen, int bclen, long estnnz )
throws IOException, DMLRuntimeException;
/**
*
* @param file
* @return
* @throws IOException
*/
public static Path[] getSequenceFilePaths( FileSystem fs, Path file )
throws IOException
{
Path[] ret = null;
if( fs.isDirectory(file) )
{
LinkedList<Path> tmp = new LinkedList<Path>();
FileStatus[] dStatus = fs.listStatus(file);
for( FileStatus fdStatus : dStatus )
if( !fdStatus.getPath().getName().startsWith("_") ) //skip internal files
tmp.add(fdStatus.getPath());
ret = tmp.toArray(new Path[0]);
}
else
{
ret = new Path[]{ file };
}
return ret;
}
/**
* NOTE: mallocDense controls if the output matrix blocks is fully allocated, this can be redundant
* if binary block read and single block.
*
* @param rlen
* @param clen
* @param estnnz
* @param mallocDense
* @return
* @throws DMLRuntimeException
* @throws IOException
*/
protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, long estnnz, boolean mallocDense, boolean mallocSparse )
throws IOException, DMLRuntimeException
{
//check input dimension
if( !OptimizerUtils.isValidCPDimensions(rlen, clen) )
throw new DMLRuntimeException("Matrix dimensions too large for CP runtime: "+rlen+" x "+clen);
//determine target representation (sparse/dense)
boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, estnnz);
//prepare result matrix block
MatrixBlock ret = new MatrixBlock((int)rlen, (int)clen, sparse, estnnz);
if( !sparse && mallocDense )
ret.allocateDenseBlockUnsafe((int)rlen, (int)clen);
else if( sparse && mallocSparse )
ret.allocateSparseRowsBlock();
return ret;
}
/**
*
* @param fs
* @param path
* @throws IOException
*/
protected static void checkValidInputFile(FileSystem fs, Path path)
throws IOException
{
//check non-existing file
if( !fs.exists(path) )
throw new IOException("File "+path.toString()+" does not exist on HDFS/LFS.");
//check for empty file
if( MapReduceTool.isFileEmpty( fs, path.toString() ) )
throw new EOFException("Empty input file "+ path.toString() +".");
}
}