package matrix.implementations.binary;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.TreeMap;
import matrix.AbstractDataMatrixInstance;
import matrix.implementations.memory.MemoryDataMatrixInstance;
import org.molgenis.matrix.MatrixException;
public class BinaryDataMatrixInstance_NEW<E> extends BinaryDataMatrixInstance
{
boolean verbose = false;
RandomAccessFile raf;
long[] textDataElementLenghtsCumulative;
public BinaryDataMatrixInstance_NEW(File bin) throws Exception
{
super(bin);
this.raf = new RandomAccessFile(this.getBin(), "r");
if (this.getTextDataElementLengths() != null)
{
this.textDataElementLenghtsCumulative = new long[this.getTextDataElementLengths().length + 1];
textDataElementLenghtsCumulative[0] = 0; // convenient when adding
// to pointer position
long cumulative = 0;
for (int i = 0; i < this.getTextDataElementLengths().length; i++)
{
cumulative = cumulative + this.getTextDataElementLengths()[i];
textDataElementLenghtsCumulative[i + 1] = cumulative;
}
}
}
/**
*
* @param arr
* @param indexInArr
* @param indexInMatrix
* @return
*/
private Object parseFromByteArr(byte[] arr, int indexInArr, int indexInMatrix)
{
// if(verbose){ System.out.println("arr size = " + arr.length +
// ", requested " + indexInArr); }
if (this.getData().getValueType().equals("Decimal"))
{
long longBits = 0;
int index = indexInArr * 8;
for (int j = 0; j < 8; j++)
{
longBits <<= 8;
longBits |= (long) arr[index + j] & 255;
}
double d = Double.longBitsToDouble(longBits);
if (d == Double.MAX_VALUE)
{
return null;
}
else
{
return d;
}
}
else
{
char[] subArr;
if (this.getTextElementLength() != 0)
{
int start = indexInArr * this.getTextElementLength();
int stop = start + this.getTextElementLength();
subArr = new char[this.getTextElementLength()];
int count = 0;
for (int j = start; j < stop; j++)
{
subArr[count] = (char) arr[j];
count++;
}
}
else
{
subArr = new char[this.getTextDataElementLengths()[indexInMatrix]];
int firstArrElementIndex = indexInMatrix - indexInArr;
long arrayStartPos = this.textDataElementLenghtsCumulative[firstArrElementIndex];
int elementStartPos = (int) (this.textDataElementLenghtsCumulative[indexInMatrix] - arrayStartPos);
int elementStopPos = elementStartPos + this.getTextDataElementLengths()[indexInMatrix];
int count = 0;
for (int j = elementStartPos; j < elementStopPos; j++)
{
subArr[count] = (char) arr[j];
count++;
}
}
String fromChars = new String(subArr);
if (fromChars.equals(this.getNullChar()))
{
return "";
}
else
{
return fromChars;
}
}
}
/**
* Could be used for optimizing: skip parsing of ALL results when retrieving
* sparse data
*
* @param startElement
* @param elementAmount
* @param replaceNulls
* @return
* @throws IOException
*/
private byte[] readChunk(int startElement, int elementAmount) throws IOException
{
long startPointer;
int totalBytes;
if (this.getData().getValueType().equals("Decimal"))
{
startPointer = this.getStartOfElementsPointer() + (startElement * 8);
totalBytes = elementAmount * 8;
}
else
{
if (this.getTextElementLength() != 0)
{
startPointer = this.getStartOfElementsPointer() + (startElement * this.getTextElementLength());
totalBytes = elementAmount * this.getTextElementLength();
}
else
{
startPointer = this.getStartOfElementsPointer() + this.textDataElementLenghtsCumulative[startElement];
totalBytes = (int) (this.textDataElementLenghtsCumulative[startElement + elementAmount] - this.textDataElementLenghtsCumulative[startElement]);
}
}
byte[] bytes = new byte[totalBytes];
if (startPointer != raf.getFilePointer())
{
raf.seek(startPointer);
}
raf.read(bytes);
return bytes;
}
@Override
/**
* Get one row. Still fast if the rows are sequentially retrieved. (index increment of 1)
*/
public Object[] getRow(int rowIndex) throws Exception
{
int[] cols = new int[this.getNumberOfCols()];
int[] rows = new int[]
{ rowIndex };
for (int c = 0; c < this.getNumberOfCols(); c++)
{
cols[c] = c;
}
// submatrix should have only 1 column: get this
// (from in memory implementation) and return
return getSubMatrix(rows, cols).getRow(0);
// return readChunk((rowIndex * this.getNumberOfCols()),
// this.getNumberOfCols(), true);
}
@Override
/**
* Get one column.
* Special case of getSubMatrix(int[] rowIndices, int[] colIndices) where we want all rows and only 1 column.
* TODO: we know the getSubMatrix result has only one column, but getCol still copies it out.. relatively expensive
*/
public Object[] getCol(int colIndex) throws Exception
{
int[] cols = new int[]
{ colIndex };
int[] rows = new int[this.getNumberOfRows()];
for (int r = 0; r < this.getNumberOfRows(); r++)
{
rows[r] = r;
}
// submatrix should have only 1 column: get this
// (from in memory implementation) and return
return getSubMatrix(rows, cols).getCol(0);
}
@Override
/**
* Get one element. Still fast if the elements are sequentially retrieved. (index increment of 1, by row)
*/
public Object getElement(int rowIndex, int colIndex) throws Exception
{
int[] cols = new int[]
{ colIndex };
int[] rows = new int[]
{ rowIndex };
return getSubMatrix(rows, cols).getElement(0, 0);
}
@Override
public Object[][] getElements() throws MatrixException
{
int[] rows = new int[this.getNumberOfRows()];
int[] cols = new int[this.getNumberOfCols()];
for (int r = 0; r < this.getNumberOfRows(); r++)
{
rows[r] = r;
}
for (int c = 0; c < this.getNumberOfCols(); c++)
{
cols[c] = c;
}
// should be a memorymatrix, meaning the getElements just returns
// current memory location
return this.getSubMatrix(rows, cols).getElements();
}
/**
* Get a submatrix by starts plus offsets. Special case of
* getSubMatrix(int[] rowIndices, int[] colIndices) where all indices are
* sequential.
*/
@Override
public AbstractDataMatrixInstance<Object> getSubMatrixByOffset(int row, int nRows, int col, int nCols)
throws Exception
{
int[] rows = new int[nRows];
int[] cols = new int[nCols];
int counter = 0;
for (int r = row; r < row + nRows; r++)
{
rows[counter] = r;
counter++;
}
counter = 0;
for (int c = col; c < col + nCols; c++)
{
cols[counter] = c;
counter++;
}
return this.getSubMatrix(rows, cols);
}
/**
* Get a submatrix by
*/
@Override
public AbstractDataMatrixInstance<Object> getSubMatrix(int[] rowIndices, int[] colIndices) throws MatrixException
{
// result
Object[][] result = new Object[rowIndices.length][colIndices.length];
// keys: the indices to retrieve, values: the original location of this
// index in the provided array
// we sort the keys from low to high
TreeMap<Integer, Integer> rowIndexPositions = new TreeMap<Integer, Integer>(new sortInt());
TreeMap<Integer, Integer> colIndexPositions = new TreeMap<Integer, Integer>(new sortInt());
for (int i = 0; i < rowIndices.length; i++)
{
rowIndexPositions.put(rowIndices[i], i);
}
for (int i = 0; i < colIndices.length; i++)
{
colIndexPositions.put(colIndices[i], i);
}
int lowestRowIndex = rowIndexPositions.firstKey();
int highestRowIndex = rowIndexPositions.lastKey();
int lowestColIndex = colIndexPositions.firstKey();
int highestColIndex = colIndexPositions.lastKey();
int firstElement = (this.getNumberOfCols() * lowestRowIndex) + lowestColIndex; // inclusive
int lastElement = (this.getNumberOfCols() * highestRowIndex) + highestColIndex + 1; // exclusive
int totalElements = lastElement - firstElement;
if (verbose)
{
System.out.println("row indices + return position:");
}
for (Integer key : rowIndexPositions.keySet())
{
if (verbose)
{
System.out.print(key + "->" + rowIndexPositions.get(key) + " ");
}
}
if (verbose)
{
System.out.println();
}
if (verbose)
{
System.out.println("col indices + return position:");
}
for (Integer key : colIndexPositions.keySet())
{
if (verbose)
{
System.out.print(key + "->" + colIndexPositions.get(key) + " ");
}
}
if (verbose)
{
System.out.println();
}
if (verbose)
{
System.out.println("lowestRowIndex: " + lowestRowIndex);
}
if (verbose)
{
System.out.println("highestRowIndex: " + highestRowIndex);
}
if (verbose)
{
System.out.println("lowestColIndex: " + lowestColIndex);
}
if (verbose)
{
System.out.println("highestColIndex: " + highestColIndex);
}
if (verbose)
{
System.out.println("nr of rows: " + this.getNumberOfRows());
}
if (verbose)
{
System.out.println("nr of columns: " + this.getNumberOfCols());
}
if (verbose)
{
System.out.println("in 2D: firstElement (incluse): " + firstElement);
}
if (verbose)
{
System.out.println("in 2D: lastElement (exclusive): " + lastElement);
}
if (verbose)
{
System.out
.println("total elements we're going to read over (==last-first, but we might skip 'empty' chunks in the middle): "
+ totalElements);
}
long memAlloc = (Runtime.getRuntime().freeMemory() / 4); // 25% of
// available
// memory
// for
// reading
if (verbose)
{
System.out.println("bytes of memory reserved for reading chunks: " + memAlloc);
}
int elementLength;
if (this.getData().getValueType().equals("Decimal"))
{
elementLength = 8;
}
else
{
if (this.getTextElementLength() != 0)
{
elementLength = this.getTextElementLength();
}
else
{
elementLength = -1; // we don't know yet for variable text
// element size
}
}
int maxElementsToRead = (int) (memAlloc / elementLength);
if (verbose)
{
System.out.println("we can hold " + maxElementsToRead + " elements in memory");
}
if (maxElementsToRead > totalElements)
{
if (verbose)
{
System.out.println("OPTIMIZATION: maxElementsToRead (" + maxElementsToRead + ") > totalElements ("
+ totalElements + "), adjusting maxElementsToRead to " + totalElements);
}
maxElementsToRead = totalElements;
}
boolean done = false;
int iterationCounter = 0;
int currentStartElement = firstElement;
while (!done)
{
if (verbose)
{
System.out
.println("iteration nr " + iterationCounter + ", currentStartElement: " + currentStartElement);
}
iterationCounter++;
// find out if we're going to get elements we want in the next read
// action
// if not: seek the RAF and adjust start element!
boolean skipChunkAndSeek = true;
if (elementLength == -1)
{
if (verbose)
{
System.out.println("VARTEXT: finding out maxElementsToRead range!");
}
int cumuLength = 0;
int iter = 0;
for (int i = currentStartElement; i < this.getTextDataElementLengths().length; i++)
{
if (cumuLength > memAlloc)
{
if (verbose)
{
System.out.println("VARTEXT: cumuLength > memAlloc at cumuLength = " + cumuLength
+ ", iter/maxElementsToRead = " + iter + " ; BREAKING");
}
break;
}
else if (iter > totalElements)
{
if (verbose)
{
System.out.println("VARTEXT: iter > totalElements at cumuLength = " + cumuLength
+ ", iter/maxElementsToRead = " + iter + " ; BREAKING");
}
break;
}
cumuLength += this.getTextDataElementLengths()[i];
iter++;
}
maxElementsToRead = iter;
if (verbose)
{
System.out.println("VARTEXT: maxElementsToRead = " + maxElementsToRead);
}
}
for (int elementIndex = currentStartElement; elementIndex < currentStartElement + maxElementsToRead; elementIndex++)
{
int checkCol = elementIndex % this.getNumberOfCols();
int checkRow = (currentStartElement - checkCol) / this.getNumberOfCols();
// e.g. if we're at element 12 in a 5-col matrix, we check if
// colIndex 2 if part of the result, and so on
// if there is at least one, we'll get the chunk
if (colIndexPositions.containsKey(checkCol) && rowIndexPositions.containsKey(checkRow))
{
if (verbose)
{
System.out.println("the coming chunk has data we want (col " + checkCol + ", row " + checkRow
+ ")");
}
skipChunkAndSeek = false;
break;
}
}
try
{
if (skipChunkAndSeek)
{
if (verbose)
{
System.out.println("NO DATA IN NEXT CHUNK - SKIPPING AND SEEKING");
}
int currentColPos = currentStartElement % this.getNumberOfCols();
int currentRowPos = (currentStartElement - currentColPos) / this.getNumberOfCols();
int newColPos = -1;
int newRowPos = -1;
// special case: we need to get a column from the current
// row still
if (currentColPos <= colIndexPositions.lastKey() && rowIndexPositions.containsKey(currentRowPos))
{
// skip columns (or the same column using ceilingKey()
// if currentColPos == colIndexPositions.lastKey()
newColPos = colIndexPositions.ceilingKey(currentColPos);
newRowPos = currentRowPos;
}
else
{
// skip rows (using higherKey()) and start at first
// wanted column in that row
newColPos = colIndexPositions.firstKey();
newRowPos = rowIndexPositions.higherKey(currentRowPos);
}
currentStartElement = (this.getNumberOfCols() * newRowPos) + newColPos;
int newPointer = this.getStartOfElementsPointer() + (currentStartElement * elementLength);
if (elementLength == -1)
{
newPointer = (int) (this.getStartOfElementsPointer() + this.textDataElementLenghtsCumulative[currentStartElement]);
if (verbose)
{
System.out.println("VARTEXT skip pointer to " + newPointer);
}
}
if (verbose)
{
System.out.println("new currentStartElement = " + currentStartElement + " (at row " + newRowPos
+ ", col " + newColPos + ") seeking to " + newPointer);
}
raf.seek(newPointer);
}
else
{
if (verbose)
{
System.out.println("reading from " + currentStartElement + " to "
+ (currentStartElement + maxElementsToRead));
}
// read the chunk
byte[] rawElements = readChunk(currentStartElement, maxElementsToRead);
for (int i = 0; i < maxElementsToRead; i++)
{
int inChunkColPos = (currentStartElement + i) % this.getNumberOfCols();
int inChunkRowPos = (currentStartElement + i - inChunkColPos) / this.getNumberOfCols();
if (colIndexPositions.containsKey(inChunkColPos)
&& rowIndexPositions.containsKey(inChunkRowPos))
{
// map to the correct position in the output
// (usually the same, but could be different!)
result[rowIndexPositions.get(inChunkRowPos)][colIndexPositions.get(inChunkColPos)] = parseFromByteArr(
rawElements, i, currentStartElement + i);
}
}
currentStartElement = currentStartElement + maxElementsToRead;
}
}
catch (IOException e)
{
throw new MatrixException(e);
}
if (currentStartElement >= lastElement)
{
if (verbose)
{
System.out.println("quitting: " + currentStartElement + " >= " + lastElement);
}
done = true;
}
else
{
if (verbose)
{
System.out.println("not quitting: " + currentStartElement + " < " + lastElement);
}
}
}
List<String> rowNames = new ArrayList<String>();
List<String> colNames = new ArrayList<String>();
for (int rowIndex : rowIndices)
{
rowNames.add(this.getRowNames().get(rowIndex).toString());
}
for (int colIndex : colIndices)
{
colNames.add(this.getColNames().get(colIndex).toString());
}
AbstractDataMatrixInstance dm = new MemoryDataMatrixInstance(rowNames, colNames, result, this.getData());
return dm;
}
}
class sortInt implements Comparator<Integer>
{
public int compare(Integer a, Integer b)
{
if (a.intValue() < b.intValue())
{
return -1;
}
else if (a.intValue() == b.intValue())
{
return 0;
}
else
{
return 1;
}
}
}