/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.cpfile;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map.Entry;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.util.Cell;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.controlprogram.parfor.util.StagingFileUtils;
import org.apache.sysml.runtime.functionobjects.ParameterizedBuiltin;
import org.apache.sysml.runtime.functionobjects.ValueFunction;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.io.MatrixWriter;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.SimpleOperator;
import org.apache.sysml.runtime.util.FastStringTokenizer;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
/**
* File-based (out-of-core) realization of remove empty for robustness because there is no
* parallel version due to data-dependent row- and column dependencies.
*
*/
public class ParameterizedBuiltinCPFileInstruction extends ParameterizedBuiltinCPInstruction
{
public ParameterizedBuiltinCPFileInstruction(Operator op, HashMap<String, String> paramsMap, CPOperand out, String opcode, String istr)
{
super(op, paramsMap, out, opcode, istr);
}
public static ParameterizedBuiltinCPFileInstruction parseInstruction( String str )
throws DMLRuntimeException
{
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
// first part is always the opcode
String opcode = parts[0];
// last part is always the output
CPOperand out = new CPOperand( parts[parts.length-1] );
// process remaining parts and build a hash map
HashMap<String,String> paramsMap = constructParameterMap(parts);
// determine the appropriate value function
ValueFunction func = null;
if ( opcode.equalsIgnoreCase("rmempty") ) {
func = ParameterizedBuiltin.getParameterizedBuiltinFnObject(opcode);
return new ParameterizedBuiltinCPFileInstruction(new SimpleOperator(func), paramsMap, out, opcode, str);
}
else {
throw new DMLRuntimeException("Unknown opcode (" + opcode + ") for ParameterizedBuiltin Instruction.");
}
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
String opcode = getOpcode();
if ( opcode.equalsIgnoreCase("rmempty") )
{
// get inputs
MatrixObject src = ec.getMatrixObject( params.get("target") );
MatrixObject out = ec.getMatrixObject( output.getName() );
String margin = params.get("margin");
// export input matrix (if necessary)
src.exportData();
//core execution
RemoveEmpty rm = new RemoveEmpty( margin, src, out );
out = rm.execute();
//put output
ec.setVariable(output.getName(), out);
}
else {
throw new DMLRuntimeException("Unknown opcode : " + opcode);
}
}
/**
* Remove empty rows as a inner class in order to allow testing independent of the
* overall SystemML instruction framework.
*
*/
public static class RemoveEmpty
{
private String _margin = null;
private MatrixObject _src = null;
private MatrixObject _out = null;
public RemoveEmpty( String margin, MatrixObject src, MatrixObject out )
{
_margin = margin;
_src = src;
_out = out;
}
public MatrixObject execute()
throws DMLRuntimeException
{
//Timing time = new Timing();
//time.start();
//initial setup
String fnameOld = _src.getFileName();
String fnameNew = _out.getFileName();
InputInfo ii = ((MatrixFormatMetaData)_src.getMetaData()).getInputInfo();
MatrixCharacteristics mc = _src.getMatrixCharacteristics();
String stagingDir = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_WORK);
LocalFileUtils.createLocalFileIfNotExist(stagingDir);
long ret = -1;
try
{
boolean diagBlocks = false;
//Phase 1: write file to staging
if( ii == InputInfo.TextCellInputInfo )
createTextCellStagingFile( fnameOld, stagingDir );
else if( ii == InputInfo.BinaryCellInputInfo )
createBinaryCellStagingFile( fnameOld, stagingDir );
else if( ii == InputInfo.BinaryBlockInputInfo )
diagBlocks = createBinaryBlockStagingFile( fnameOld, stagingDir );
//System.out.println("Executed phase 1 in "+time.stop());
//Phase 2: scan empty rows/cols
if( diagBlocks )
ret = createKeyMappingDiag(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
else
ret = createKeyMapping(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
//System.out.println("Executed phase 2 in "+time.stop());
//Phase 3: create output files
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
if( ii == InputInfo.TextCellInputInfo
|| ii == InputInfo.BinaryCellInputInfo )
{
createCellResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
}
else if( ii == InputInfo.BinaryBlockInputInfo )
{
if( diagBlocks )
createBlockResultFileDiag( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
else
createBlockResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
}
//System.out.println("Executed phase 3 in "+time.stop());
}
catch( IOException ioe )
{
throw new DMLRuntimeException( ioe );
}
//final cleanup
LocalFileUtils.cleanupWorkingDirectory(stagingDir);
//create and return new output object
if( _margin.equals("rows") )
return createNewOutputObject(_src, _out, ret, mc.getCols());
else
return createNewOutputObject(_src, _out, mc.getRows(), ret );
}
private MatrixObject createNewOutputObject( MatrixObject src, MatrixObject out, long rows, long cols )
throws DMLRuntimeException
{
String varName = out.getVarName();
String fName = out.getFileName();
ValueType vt = src.getValueType();
MatrixFormatMetaData metadata = (MatrixFormatMetaData) src.getMetaData();
MatrixObject moNew = new MatrixObject( vt, fName );
moNew.setVarName( varName );
moNew.setDataType( DataType.MATRIX );
//handle empty output block (ensure valid dimensions)
if( rows==0 || cols ==0 ){
rows = Math.max(rows, 1);
cols = Math.max(cols, 1);
try {
moNew.acquireModify(new MatrixBlock((int)rows, (int) cols, true));
moNew.release();
}
catch (CacheException e) {
throw new DMLRuntimeException(e);
}
}
//create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics( rows, cols, mcOld.getRowsPerBlock(),
mcOld.getColsPerBlock(), mcOld.getNonZeros());
MatrixFormatMetaData meta = new MatrixFormatMetaData(mc,oiOld,iiOld);
moNew.setMetaData( meta );
return moNew;
}
public void createTextCellStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList<Cell> buffer = new LinkedList<Cell>();
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
for(InputSplit split: splits)
{
RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try
{
while( reader.next(key, value) )
{
st.reset( value.toString() ); //reset tokenizer
long row = st.nextLong();
long col = st.nextLong();
double lvalue = st.nextDouble();
buffer.add(new Cell(row,col,lvalue));
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
@SuppressWarnings("deprecation")
public void createBinaryCellStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
LinkedList<Cell> buffer = new LinkedList<Cell>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for(Path lpath: MatrixReader.getSequenceFilePaths(fs, path))
{
SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
try
{
while(reader.next(key, value))
{
long row = key.getRowIndex();
long col = key.getColumnIndex();
double lvalue = value.getValue();
buffer.add(new Cell(row,col,lvalue));
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
/**
* Creates a binary block staging file and returns if the input matrix is a diag,
* because diag is the primary usecase and there is lots of optimization potential.
*
* @param fnameOld old filename
* @param stagingDir staging directory
* @return true if diag
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
@SuppressWarnings("deprecation")
public boolean createBinaryBlockStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
boolean diagBlocks = true;
for(Path lpath : MatrixReader.getSequenceFilePaths(fs, path))
{
SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
try
{
while( reader.next(key, value) )
{
if( !value.isEmptyBlock() ) //skip empty blocks (important for diag)
{
String fname = stagingDir +"/"+key.getRowIndex()+"_"+key.getColumnIndex();
LocalFileUtils.writeMatrixBlockToLocal(fname, value);
diagBlocks &= (key.getRowIndex()==key.getColumnIndex());
}
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
return diagBlocks;
}
private void appendCellBufferToStagingArea( String dir, LinkedList<Cell> buffer, int brlen, int bclen )
throws DMLRuntimeException, IOException
{
HashMap<String,LinkedList<Cell>> sortedBuffer = new HashMap<String,LinkedList<Cell>>();
//sort cells in buffer wrt key
String key = null;
for( Cell c : buffer )
{
key = (c.getRow()/brlen+1) +"_"+(c.getCol()/bclen+1);
if( !sortedBuffer.containsKey(key) )
sortedBuffer.put(key, new LinkedList<Cell>());
sortedBuffer.get(key).addLast(c);
}
//write lists of cells to local files
for( Entry<String,LinkedList<Cell>> e : sortedBuffer.entrySet() )
{
String pfname = dir + "/" + e.getKey();
StagingFileUtils.writeCellListToLocal(pfname, e.getValue());
}
}
private long createKeyMapping( String stagingDir, long rlen, long clen, int brlen, int bclen, InputInfo ii)
throws FileNotFoundException, IOException, DMLRuntimeException
{
String metaOut = stagingDir+"/meta";
long len = 0;
long lastKey = 0;
if(_margin.equals("rows"))
{
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
boolean[] flags = new boolean[brlen];
for( int k=0; k<brlen; k++ )
flags[k] = true;
//scan for empty rows
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
if( ii == InputInfo.BinaryBlockInputInfo ){
if( !LocalFileUtils.isExisting(fname) )
continue;
MatrixBlock buffer = LocalFileUtils.readMatrixBlockFromLocal(fname);
for( int i=0; i<buffer.getNumRows(); i++ )
for( int j=0; j<buffer.getNumColumns(); j++ )
{
double lvalue = buffer.quickGetValue(i, j);
if( lvalue != 0 )
flags[ i ] = false;
}
}
else{
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
}
}
//create and append key mapping
LinkedList<long[]> keyMapping = new LinkedList<long[]>();
for( int i = 0; i<flags.length; i++ )
if( !flags[i] )
keyMapping.add(new long[]{blockRow*brlen+i, lastKey++});
len += keyMapping.size();
StagingFileUtils.writeKeyMappingToLocal(metaOut, keyMapping.toArray(new long[0][0]));
}
}
else
{
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
boolean[] flags = new boolean[bclen];
for( int k=0; k<bclen; k++ )
flags[k] = true;
//scan for empty rows
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
if( ii == InputInfo.BinaryBlockInputInfo ){
if( !LocalFileUtils.isExisting(fname) )
continue;
MatrixBlock buffer = LocalFileUtils.readMatrixBlockFromLocal(fname);
for( int i=0; i<buffer.getNumRows(); i++ )
for( int j=0; j<buffer.getNumColumns(); j++ )
{
double lvalue = buffer.quickGetValue(i, j);
if( lvalue != 0 )
flags[ j ] = false;
}
}
else{
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
}
}
//create and append key mapping
LinkedList<long[]> keyMapping = new LinkedList<long[]>();
for( int i = 0; i<flags.length; i++ )
if( !flags[i] )
keyMapping.add(new long[]{blockCol*bclen+i, lastKey++});
len += keyMapping.size();
StagingFileUtils.writeKeyMappingToLocal(metaOut, keyMapping.toArray(new long[0][0]));
}
}
//final validation (matrices with dimensions 0x0 not allowed)
if( len <= 0 )
throw new DMLRuntimeException("Matrices with dimensions [0,0] not supported.");
return len;
}
private long createKeyMappingDiag( String stagingDir, long rlen, long clen, int brlen, int bclen, InputInfo ii)
throws FileNotFoundException, IOException, DMLRuntimeException
{
String metaOut = stagingDir+"/meta";
long len = 0;
long lastKey = 0;
if(_margin.equals("rows"))
{
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
boolean[] flags = new boolean[brlen];
for( int k=0; k<brlen; k++ )
flags[k] = true;
//scan for empty rows
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockRow+1);
if( ii == InputInfo.BinaryBlockInputInfo ){
if( !LocalFileUtils.isExisting(fname) )
continue;
MatrixBlock buffer = LocalFileUtils.readMatrixBlockFromLocal(fname);
for( int i=0; i<buffer.getNumRows(); i++ )
for( int j=0; j<buffer.getNumColumns(); j++ )
{
double lvalue = buffer.quickGetValue(i, j);
if( lvalue != 0 )
flags[ i ] = false;
}
}
else{
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
}
//create and append key mapping
LinkedList<long[]> keyMapping = new LinkedList<long[]>();
for( int i = 0; i<flags.length; i++ )
if( !flags[i] )
keyMapping.add(new long[]{blockRow*brlen+i, lastKey++});
len += keyMapping.size();
StagingFileUtils.writeKeyMappingToLocal(metaOut, keyMapping.toArray(new long[0][0]));
}
}
else
{
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
boolean[] flags = new boolean[bclen];
for( int k=0; k<bclen; k++ )
flags[k] = true;
//scan for empty rows
String fname = stagingDir+"/"+(blockCol+1)+"_"+(blockCol+1);
if( ii == InputInfo.BinaryBlockInputInfo ){
if( !LocalFileUtils.isExisting(fname) )
continue;
MatrixBlock buffer = LocalFileUtils.readMatrixBlockFromLocal(fname);
for( int i=0; i<buffer.getNumRows(); i++ )
for( int j=0; j<buffer.getNumColumns(); j++ )
{
double lvalue = buffer.quickGetValue(i, j);
if( lvalue != 0 )
flags[ j ] = false;
}
}
else{
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
}
//create and append key mapping
LinkedList<long[]> keyMapping = new LinkedList<long[]>();
for( int i = 0; i<flags.length; i++ )
if( !flags[i] )
keyMapping.add(new long[]{blockCol*bclen+i, lastKey++});
len += keyMapping.size();
StagingFileUtils.writeKeyMappingToLocal(metaOut, keyMapping.toArray(new long[0][0]));
}
}
//final validation (matrices with dimensions 0x0 not allowed)
if( len <= 0 )
throw new DMLRuntimeException("Matrices with dimensions [0,0] not supported.");
return len;
}
@SuppressWarnings("deprecation")
public void createCellResultFile( String fnameNew, String stagingDir, long rlen, long clen, int brlen, int bclen, InputInfo ii )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = FileSystem.get(job);
String metaOut = stagingDir+"/meta";
//prepare output
BufferedWriter twriter = null;
SequenceFile.Writer bwriter = null;
if( ii == InputInfo.TextCellInputInfo )
twriter = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)));
else if( ii == InputInfo.BinaryCellInputInfo )
bwriter = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
else
throw new DMLRuntimeException("Unsupported cell input info: "+InputInfo.inputInfoToString(ii));
StringBuilder sb = new StringBuilder();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
HashMap<Integer,HashMap<Long,Long>> keyMap = new HashMap<Integer, HashMap<Long,Long>>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
try
{
if( _margin.equals("rows") )
{
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockRow, brlen);
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
if( ii == InputInfo.TextCellInputInfo )
for( Cell c : buffer )
{
sb.append(keyMap.get(blockRow).get(c.getRow()-1)+1);
sb.append(' ');
sb.append(c.getCol());
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
twriter.write( sb.toString() );
sb.setLength(0);
}
else if( ii == InputInfo.BinaryCellInputInfo )
for( Cell c : buffer )
{
key.setIndexes(keyMap.get(blockRow).get(c.getRow()-1)+1, c.getCol());
value.setValue(c.getValue());
bwriter.append(key, value);
}
}
keyMap.remove(blockRow);
}
}
else
{
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockCol, bclen);
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
LinkedList<Cell> buffer = StagingFileUtils.readCellListFromLocal(fname);
if( ii == InputInfo.TextCellInputInfo )
for( Cell c : buffer )
{
sb.append(c.getRow());
sb.append(' ');
sb.append(keyMap.get(blockCol).get(c.getCol()-1)+1);
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
twriter.write( sb.toString() );
sb.setLength(0);
}
else if( ii == InputInfo.BinaryCellInputInfo )
for( Cell c : buffer )
{
key.setIndexes(c.getRow(), keyMap.get(blockCol).get(c.getCol()-1)+1);
value.setValue(c.getValue());
bwriter.append(key, value);
}
}
keyMap.remove(blockCol);
}
}
//Note: no need to handle empty result
}
finally {
IOUtilFunctions.closeSilently(twriter);
IOUtilFunctions.closeSilently(bwriter);
}
}
@SuppressWarnings("deprecation")
public void createBlockResultFile( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = FileSystem.get(job);
String metaOut = stagingDir+"/meta";
//prepare output
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
MatrixIndexes key = new MatrixIndexes();
try
{
if( _margin.equals("rows") )
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
HashMap<Integer,HashMap<Long,Long>> keyMap = new HashMap<Integer, HashMap<Long,Long>>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
int blockRowOut = 0;
int currentSize = -1;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0 )
{
int maxRow = currentSize;
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
int rowPos = 0;
int blockRow = Collections.min(keyMap.keySet());
for( ; blockRow < (int)Math.ceil(rlen/(double)brlen) && rowPos<brlen ; blockRow++)
{
if( keyMap.containsKey(blockRow) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
if( LocalFileUtils.isExisting(fname) )
{
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap<Long,Long> lkeyMap = keyMap.get(blockRow);
long row_offset = (long)blockRow*brlen;
for( int i=0; i<tmp.getNumRows(); i++ )
if( lkeyMap.containsKey(row_offset+i) ) {
//copy row
for( int j=0; j<tmp.getNumColumns(); j++ ) {
double lvalue = tmp.quickGetValue(i, j);
if( lvalue != 0 )
block.quickSetValue(rowPos, j, lvalue);
}
rowPos++;
}
}
else
{
HashMap<Long,Long> lkeyMap = keyMap.get(blockRow);
rowPos+=lkeyMap.size();
}
}
keyMap.remove(blockRow);
}
key.setIndexes(blockRowOut+1, blockCol+1);
writer.append(key, block);
blockRowOut++;
}
IOUtilFunctions.closeSilently(fkeyMap);
}
}
else
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, newlen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
HashMap<Integer,HashMap<Long,Long>> keyMap = new HashMap<Integer, HashMap<Long,Long>>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int maxRow = (int)(((long)blockRow*brlen + brlen < rlen) ? brlen : rlen - (long)blockRow*brlen);
int blockColOut = 0;
int currentSize = -1;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0 )
{
int maxCol = currentSize;
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
int colPos = 0;
int blockCol = Collections.min(keyMap.keySet());
for( ; blockCol < (int)Math.ceil(clen/(double)bclen) && colPos<bclen ; blockCol++)
{
if( keyMap.containsKey(blockCol) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
if( LocalFileUtils.isExisting(fname) )
{
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap<Long,Long> lkeyMap = keyMap.get(blockCol);
long col_offset = blockCol*bclen;
for( int j=0; j<tmp.getNumColumns(); j++ )
if( lkeyMap.containsKey(col_offset+j) ) {
//copy column
for( int i=0; i<tmp.getNumRows(); i++ ){
double lvalue = tmp.quickGetValue(i, j);
if( lvalue != 0 )
block.quickSetValue(i, colPos, lvalue);
}
colPos++;
}
}
else
{
HashMap<Long,Long> lkeyMap = keyMap.get(blockCol);
colPos+=lkeyMap.size();
}
}
keyMap.remove(blockCol);
}
key.setIndexes(blockRow+1, blockColOut+1);
writer.append(key, block);
blockColOut++;
}
IOUtilFunctions.closeSilently(fkeyMap);
}
}
//Note: no handling of empty matrices necessary
}
finally {
IOUtilFunctions.closeSilently(writer);
}
}
@SuppressWarnings("deprecation")
public void createBlockResultFileDiag( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = FileSystem.get(job);
String metaOut = stagingDir+"/meta";
//prepare output
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
MatrixIndexes key = new MatrixIndexes();
HashSet<Long> writtenBlocks = new HashSet<Long>();
try
{
if( _margin.equals("rows") )
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
HashMap<Integer,HashMap<Long,Long>> keyMap = new HashMap<Integer, HashMap<Long,Long>>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int currentSize = -1;
int blockRowOut = 0;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0 )
{
int rowPos = 0;
int blockRow = Collections.min(keyMap.keySet());
int maxRow = currentSize;
for( ; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
int blockCol = blockRow; // for diag known to be equivalent
int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
if( keyMap.containsKey(blockRow) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap<Long,Long> lkeyMap = keyMap.get(blockRow);
long row_offset = blockRow*brlen;
for( int i=0; i<tmp.getNumRows(); i++ )
if( lkeyMap.containsKey(row_offset+i) ) {
//copy row
for( int j=0; j<tmp.getNumColumns(); j++ ) {
double lvalue = tmp.quickGetValue(i, j);
if( lvalue != 0 )
block.quickSetValue(rowPos, j, lvalue);
}
rowPos++;
}
}
//output current block (by def of diagBlocks, no additional rows)
key.setIndexes(blockRowOut+1, blockCol+1);
writer.append(key, block);
writtenBlocks.add(IDHandler.concatIntIDsToLong(blockRowOut+1, blockCol+1));
//finished block
if( rowPos == maxRow )
{
keyMap.remove(blockRow);
blockRowOut++;
break;
}
}
}
IOUtilFunctions.closeSilently(fkeyMap);
}
else //cols
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, newlen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
HashMap<Integer,HashMap<Long,Long>> keyMap = new HashMap<Integer, HashMap<Long,Long>>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int currentSize = -1;
int blockColOut = 0;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0 )
{
int colPos = 0;
int blockCol = Collections.min(keyMap.keySet());
int maxCol = currentSize;
for( ; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
int blockRow = blockCol; // for diag known to be equivalent
int maxRow = (int)((blockRow*brlen + brlen < rlen) ? brlen : rlen - blockRow*brlen);
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
if( keyMap.containsKey(blockCol) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap<Long,Long> lkeyMap = keyMap.get(blockCol);
long col_offset = blockCol*bclen;
for( int j=0; j<tmp.getNumColumns(); j++ )
if( lkeyMap.containsKey(col_offset+j) ) {
//copy column
for( int i=0; i<tmp.getNumRows(); i++ ){
double lvalue = tmp.quickGetValue(i, j);
if( lvalue != 0 )
block.quickSetValue(i, colPos, lvalue);
}
colPos++;
}
}
//output current block (by def of diagBlocks, no additional cols)
key.setIndexes(blockRow+1, blockColOut+1);
writer.append(key, block);
writtenBlocks.add(IDHandler.concatIntIDsToLong(blockRow+1, blockColOut+1));
//finished block
if( colPos == maxCol )
{
keyMap.remove(blockCol);
blockColOut++;
break;
}
}
}
IOUtilFunctions.closeSilently(fkeyMap);
}
//write remaining empty blocks
MatrixBlock empty = new MatrixBlock(1,1,true);
long rows = _margin.equals("rows") ? newlen : rlen;
long cols = _margin.equals("cols") ? newlen : clen;
int countBlk1 = (int)Math.ceil(rows/(double)brlen)*(int)Math.ceil(cols/(double)bclen);
int countBlk2 = writtenBlocks.size();
for( int i=0; i<(int)Math.ceil(rows/(double)brlen); i++)
for(int j=0; j<(int)Math.ceil(cols/(double)bclen); j++ )
if( !writtenBlocks.contains(IDHandler.concatIntIDsToLong(i+1, j+1)) )
{
int maxRow = (int)((i*brlen + brlen < rows) ? brlen : rows - i*brlen);
int maxCol = (int)((j*bclen + bclen < cols) ? bclen : cols - j*bclen);
empty.reset(maxRow, maxCol);
key.setIndexes(i+1, j+1);
writer.append(key, empty);
countBlk2++;
}
if( countBlk1 != countBlk2 )
throw new DMLRuntimeException("Wrong number of written result blocks: "+countBlk1+" vs "+countBlk2+".");
}
finally {
IOUtilFunctions.closeSilently(writer);
}
}
}
}