package matrix.implementations.binary;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import matrix.AbstractDataMatrixInstance;
import matrix.implementations.memory.MemoryDataMatrixInstance;
import org.apache.log4j.Logger;
import org.molgenis.data.Data;
import org.molgenis.matrix.MatrixException;
public class BinaryDataMatrixInstance extends AbstractDataMatrixInstance<Object>
{
Logger logger = Logger.getLogger(getClass().getSimpleName());
private int textElementLength;
private int startOfElementsPointer;
private int endOfElementsPointer; // FIXME: is this big enough? whats the
// approx. file size then?
private byte[] textDataElementLengths;
private File bin;
private String nullChar;
private Pattern nullCharPattern;
public BinaryDataMatrixInstance(File bin) throws Exception
{
this.setBin(bin);
FileInputStream fis;
DataInputStream dis;
fis = new FileInputStream(bin);
dis = new DataInputStream(fis);
Data dataDescription = new Data();
int startOfElements = 0;
// first 'Data' object metadata contained in the bin file
this.setNullChar(readNextChars(dis, 1));
dataDescription.setName(readNextChars(dis, dis.readUnsignedByte()));
dataDescription.setInvestigation_Name(readNextChars(dis, dis.readUnsignedByte()));
dataDescription.setFeatureType(readNextChars(dis, dis.readUnsignedByte()));
dataDescription.setTargetType(readNextChars(dis, dis.readUnsignedByte()));
dataDescription.setValueType(dis.readBoolean() == true ? "Decimal" : "Text");
this.setNumberOfCols(dis.readInt());
this.setNumberOfRows(dis.readInt());
startOfElements += 1;
startOfElements += dataDescription.getName().length() + 1;
startOfElements += dataDescription.getInvestigation_Name().length() + 1;
startOfElements += dataDescription.getFeatureType().length() + 1;
startOfElements += dataDescription.getTargetType().length() + 1;
startOfElements += 1 + 4 + 4;
this.setData(dataDescription);
// now the information contained within the actual matrix file
int[] colNameLengths = new int[this.getNumberOfCols()];
int[] rowNameLengths = new int[this.getNumberOfRows()];
for (int i = 0; i < colNameLengths.length; i++)
{
colNameLengths[i] = dis.readUnsignedByte();
}
for (int i = 0; i < rowNameLengths.length; i++)
{
rowNameLengths[i] = dis.readUnsignedByte();
}
startOfElements += colNameLengths.length;
startOfElements += rowNameLengths.length;
ArrayList<String> colNames = new ArrayList<String>(this.getNumberOfCols());
ArrayList<String> rowNames = new ArrayList<String>(this.getNumberOfRows());
for (int i = 0; i < this.getNumberOfCols(); i++)
{
colNames.add(i, readNextChars(dis, colNameLengths[i]));
startOfElements += colNameLengths[i];
}
for (int i = 0; i < this.getNumberOfRows(); i++)
{
rowNames.add(i, readNextChars(dis, rowNameLengths[i]));
startOfElements += rowNameLengths[i];
}
this.setColNames(colNames);
this.setRowNames(rowNames);
if (dataDescription.getValueType().equals("Text"))
{
this.setTextElementLength(dis.readUnsignedByte());
logger.debug("this.getTextElementLength() = " + this.getTextElementLength());
startOfElements += 1;
if (this.getTextElementLength() == 0)
{
byte[] textDataElementLengths = new byte[this.getNumberOfCols() * this.getNumberOfRows()];
dis.read(textDataElementLengths);
startOfElements += textDataElementLengths.length;
this.setTextDataElementLengths(textDataElementLengths);
}
}
// now prepare for random access querying
this.setStartOfElementsPointer(startOfElements);
this.setNullCharPattern(Pattern.compile(this.getNullChar() + "+"));
if (dataDescription.getValueType().equals("Text"))
{
if (this.getTextElementLength() == 0)
{
int endOfElementsPointer = this.getStartOfElementsPointer();
for (byte b : this.getTextDataElementLengths())
{
endOfElementsPointer += b;
}
this.setEndOfElementsPointer(endOfElementsPointer);
}
else
{
int endOfElementsPointer = startOfElements
+ (this.getNumberOfCols() * this.getNumberOfRows() * this.getTextElementLength());
this.setEndOfElementsPointer(endOfElementsPointer);
}
}
else
{
int endOfElementsPointer = startOfElements + (this.getNumberOfCols() * this.getNumberOfRows() * 8);
this.setEndOfElementsPointer(endOfElementsPointer);
}
}
private Double readNextDoubleFromRAF(RandomAccessFile raf) throws IOException
{
byte[] arr = new byte[8];
raf.read(arr);
double d = byteArrayToDouble(arr);
if (d == Double.MAX_VALUE)
{
return null;
}
return d;
}
private Double[] readNextDoublesFromRAF(RandomAccessFile raf, int nr) throws IOException
{
byte[] arr = new byte[nr * 8];
raf.read(arr);
return byteArrayToDoubles(arr);
}
private Double[] byteArrayToDoubles(byte[] arr)
{
int nr = arr.length / 8;
Double[] res = new Double[nr];
for (int i = 0; i < arr.length; i += 8)
{
long longBits = 0;
for (int j = 0; j < 8; j++)
{
longBits <<= 8;
longBits |= (long) arr[i + j] & 255;
}
double d = Double.longBitsToDouble(longBits);
if (d == Double.MAX_VALUE)
{
res[i / 8] = null;
}
else
{
res[i / 8] = d;
}
}
return res;
}
private double byteArrayToDouble(byte[] arr)
{
long longBits = 0;
for (int i = 0; i < arr.length; i++)
{
longBits <<= 8;
longBits |= (long) arr[i] & 255;
}
return Double.longBitsToDouble(longBits);
}
private String readNextCharsFromRAF(RandomAccessFile raf, int stringLength) throws IOException
{
byte[] string = new byte[stringLength];
raf.read(string);
// FIXME: little experiment..
// byte[] newStr = new byte[string.length];
// int count = 0;
// for(byte b : string){
// newStr[count] = (byte) (b - 100);
// count++;
// }
// String result = new String(newStr);
String result = new String(string);
if (this.getNullCharPattern().matcher(result).matches())
{
result = "";
}
return result;
}
private String readNextChars(DataInputStream dis, int stringLength) throws IOException
{
byte[] string = new byte[stringLength];
dis.read(string);
return new String(string);
}
@Override
public Object[] getCol(int colindex) throws Exception
{
RandomAccessFile raf;
Object[] result = new Object[this.getNumberOfRows()];
raf = new RandomAccessFile(this.getBin(), "r");
if (this.getData().getValueType().equals("Decimal"))
{
for (int i = 0; i < result.length; i++)
{
raf.seek(this.startOfElementsPointer + (colindex * 8) + (i * 8 * this.getNumberOfCols()));
// result[i] = raf.readDouble();
result[i] = readNextDoubleFromRAF(raf);
}
}
else
{
if (this.getTextElementLength() != 0)
{
for (int i = 0; i < result.length; i++)
{
raf.seek(this.startOfElementsPointer + (colindex * this.getTextElementLength())
+ (i * this.getTextElementLength() * this.getNumberOfCols()));
result[i] = readNextCharsFromRAF(raf, this.getTextElementLength());
}
}
else
{
long bytePos = 0;
int nextindex = 0;
int lastindex = 0;
for (int i = 0; i < result.length; i++)
{
nextindex = colindex + (i * this.getNumberOfCols());
for (int j = lastindex; j < nextindex; j++)
{
bytePos += this.getTextDataElementLengths()[j];
}
lastindex = nextindex + 1;
raf.seek(this.startOfElementsPointer + bytePos);
byte elementLength = this.getTextDataElementLengths()[colindex + (i * this.getNumberOfCols())];
result[i] = readNextCharsFromRAF(raf, elementLength);
bytePos += elementLength;
}
}
}
raf.close();
return result;
}
@Override
public Object[] getRow(int rowindex) throws Exception
{
Object[] result = new Object[this.getNumberOfCols()];
RandomAccessFile raf = new RandomAccessFile(this.getBin(), "r");
if (this.getData().getValueType().equals("Decimal"))
{
raf.seek(this.startOfElementsPointer + (rowindex * this.getNumberOfCols() * 8));
for (int i = 0; i < result.length; i++)
{
// result[i] = raf.readDouble();
result[i] = readNextDoubleFromRAF(raf);
}
}
else
{
if (this.getTextElementLength() != 0)
{
raf.seek(this.startOfElementsPointer
+ (rowindex * this.getNumberOfCols() * this.getTextElementLength()));
for (int i = 0; i < result.length; i++)
{
result[i] = readNextCharsFromRAF(raf, this.getTextElementLength());
}
}
else
{
int startIndex = rowindex * this.getNumberOfCols();
long byteOffset = 0;
for (int i = 0; i < startIndex; i++)
{
byteOffset += this.getTextDataElementLengths()[i];
}
raf.seek(this.startOfElementsPointer + byteOffset);
for (int i = 0; i < result.length; i++)
{
result[i] = readNextCharsFromRAF(raf, this.getTextDataElementLengths()[startIndex + i]);
}
}
}
raf.close();
return result;
}
@Override
public Object getElement(int rowindex, int colindex) throws Exception
{
Object result = new Object();
RandomAccessFile raf = new RandomAccessFile(this.getBin(), "r");
int startIndex = (rowindex * this.getNumberOfCols()) + colindex;
if (this.getData().getValueType().equals("Decimal"))
{
raf.seek(this.startOfElementsPointer + (startIndex * 8));
// result = raf.readDouble();
result = readNextDoubleFromRAF(raf);
}
else
{
if (this.getTextElementLength() != 0)
{
raf.seek(this.startOfElementsPointer + (startIndex * this.getTextElementLength()));
result = readNextCharsFromRAF(raf, this.getTextElementLength());
}
else
{
long byteOffset = 0;
for (int i = 0; i < startIndex; i++)
{
byteOffset += this.getTextDataElementLengths()[i];
}
raf.seek(this.startOfElementsPointer + byteOffset);
result = readNextCharsFromRAF(raf, this.getTextDataElementLengths()[startIndex]);
}
}
raf.close();
return result;
}
@Override
public AbstractDataMatrixInstance getSubMatrix(int[] rowIndices, int[] colIndices) throws MatrixException
{
try
{
// the optimized way: find out of indices form a single block
// if so, used offset retrieval instead
boolean offsetAble = true;
for (int i = 0; i < rowIndices.length - 1; i++)
{
if (rowIndices[i] != (rowIndices[i + 1] + 1))
{
offsetAble = false;
break;
}
}
if (offsetAble)
{
for (int i = 0; i < colIndices.length - 1; i++)
{
if (colIndices[i] != (colIndices[i + 1] + 1))
{
offsetAble = false;
break;
}
}
}
if (offsetAble)
{
return getSubMatrixByOffset(rowIndices[0], rowIndices.length, colIndices[0], colIndices.length);
}
// the usual way: get single elements at the crossing sections of
// indices
// very inefficient but always works
AbstractDataMatrixInstance<Object> result = null;
Object[][] elements = new Object[rowIndices.length][colIndices.length];
RandomAccessFile raf = new RandomAccessFile(this.getBin(), "r");
int rowCount = 0;
int colCount = 0;
if (this.getData().getValueType().equals("Decimal"))
{
for (int rowindex : rowIndices)
{
for (int colindex : colIndices)
{
int index = (rowindex * this.getNumberOfCols()) + colindex;
raf.seek(this.startOfElementsPointer + (index * 8));
elements[rowCount][colCount] = readNextDoubleFromRAF(raf);
colCount++;
}
rowCount++;
colCount = 0;
}
}
else
{
if (this.getTextElementLength() != 0)
{
for (int rowIndex : rowIndices)
{
for (int colIndex : colIndices)
{
int index = (rowIndex * this.getNumberOfCols()) + colIndex;
raf.seek(this.startOfElementsPointer + (index * this.getTextElementLength()));
elements[rowCount][colCount] = readNextCharsFromRAF(raf, this.getTextElementLength());
colCount++;
}
rowCount++;
colCount = 0;
}
}
else
{
for (int rowIndex : rowIndices)
{
for (int colIndex : colIndices)
{
int index = (rowIndex * this.getNumberOfCols()) + colIndex;
long byteOffset = 0;
for (int i = 0; i < index; i++)
{
byteOffset += this.getTextDataElementLengths()[i];
}
raf.seek(this.startOfElementsPointer + byteOffset);
elements[rowCount][colCount] = readNextCharsFromRAF(raf,
this.getTextDataElementLengths()[index]);
colCount++;
}
rowCount++;
colCount = 0;
}
}
}
// end fill elements
List<String> rowNames = new ArrayList<String>();
List<String> colNames = new ArrayList<String>();
for (int rowIndex : rowIndices)
{
rowNames.add(this.getRowNames().get(rowIndex).toString());
}
for (int colIndex : colIndices)
{
colNames.add(this.getColNames().get(colIndex).toString());
}
result = new MemoryDataMatrixInstance(rowNames, colNames, elements, this.getData());
raf.close();
return result;
}
catch (Exception e)
{
throw new MatrixException(e);
}
}
@Override
public AbstractDataMatrixInstance<Object> getSubMatrixByOffset(int row, int nrows, int col, int ncols)
throws Exception
{
AbstractDataMatrixInstance<Object> result = null;
Object[][] elements = new Object[nrows][ncols];
// fill elements
RandomAccessFile raf = new RandomAccessFile(this.getBin(), "r");
int rowCount = 0;
int colCount = 0;
if (this.getData().getValueType().equals("Decimal"))
{
if (ncols == getNumberOfCols())
{
// no seeking between rows
int startIndex = (row * this.getNumberOfCols());
int amountOfDoubles = nrows * ncols;
raf.seek(this.startOfElementsPointer + (startIndex * 8));
Double[] res = readNextDoublesFromRAF(raf, amountOfDoubles);
for (int i = 0; i < res.length; i += ncols)
{
Double[] chunk = new Double[ncols];
for (int j = 0; j < ncols; j++)
{
chunk[j] = res[i + j];
}
elements[i / ncols] = chunk;
}
}
else
{
// must use some seeking here!!! this is the old way still (like
// the rest of this implementation)
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
int startIndex = (rowIndex * this.getNumberOfCols()) + col;
raf.seek(this.startOfElementsPointer + (startIndex * 8));
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
elements[rowCount][colCount] = readNextDoubleFromRAF(raf);
colCount++;
}
rowCount++;
colCount = 0;
}
}
}
else
{
if (this.getTextElementLength() != 0)
{
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
int startIndex = (rowIndex * this.getNumberOfCols()) + col;
raf.seek(this.startOfElementsPointer + (startIndex * this.getTextElementLength()));
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
elements[rowCount][colCount] = readNextCharsFromRAF(raf, this.getTextElementLength());
colCount++;
}
rowCount++;
colCount = 0;
}
}
else
{
long byteOffset = 0;
int nextIndex = 0;
int lastIndex = 0;
int currentIndex = 0;
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
nextIndex = (rowIndex * this.getNumberOfCols()) + col;
for (int i = lastIndex; i < nextIndex; i++)
{
byteOffset += this.getTextDataElementLengths()[i];
}
lastIndex = nextIndex + ncols;
raf.seek(this.startOfElementsPointer + byteOffset);
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
currentIndex = (rowIndex * this.getNumberOfCols()) + colIndex;
byte elementLength = this.getTextDataElementLengths()[currentIndex];
elements[rowCount][colCount] = readNextCharsFromRAF(raf, elementLength);
byteOffset += elementLength;
colCount++;
}
rowCount++;
colCount = 0;
}
}
}
// end fill elements
List<String> rowNames = getRowNames().subList(row, row + nrows);
List<String> colNames = getColNames().subList(col, col + ncols);
result = new MemoryDataMatrixInstance(rowNames, colNames, elements, this.getData());
raf.close();
return result;
}
byte[] getTextDataElementLengths()
{
return textDataElementLengths;
}
void setTextDataElementLengths(byte[] textDataElementLengths)
{
this.textDataElementLengths = textDataElementLengths;
}
int getTextElementLength()
{
return textElementLength;
}
void setTextElementLength(int textElementLength)
{
this.textElementLength = textElementLength;
}
// redundant with getAsFile(), but used internally
File getBin()
{
return bin;
}
void setBin(File bin)
{
this.bin = bin;
}
String getNullChar()
{
return nullChar;
}
void setNullChar(String nullChar)
{
this.nullChar = nullChar;
}
Pattern getNullCharPattern()
{
return nullCharPattern;
}
void setNullCharPattern(Pattern nullCharPattern)
{
this.nullCharPattern = nullCharPattern;
}
int getStartOfElementsPointer()
{
return startOfElementsPointer;
}
void setStartOfElementsPointer(int startOfElementsPointer)
{
this.startOfElementsPointer = startOfElementsPointer;
}
int getEndOfElementsPointer()
{
return endOfElementsPointer;
}
void setEndOfElementsPointer(int endOfElementsPointer)
{
this.endOfElementsPointer = endOfElementsPointer;
}
/**
* TODO: Make generic getElements function that is used by this (or is this)
* and by 'Matrix get(int,int,int,int)' Because the code is pretty much
* duplicate right now. (done for safety)
*/
@Override
public Object[][] getElements() throws MatrixException
{
try
{
int row = 0;
int col = 0;
int nrows = this.getNumberOfRows();
int ncols = this.getNumberOfCols();
Object[][] elements = new Object[nrows][ncols];
// fill elements
RandomAccessFile raf = new RandomAccessFile(this.getBin(), "r");
int rowCount = 0;
int colCount = 0;
if (this.getData().getValueType().equals("Decimal"))
{
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
int startIndex = (rowIndex * this.getNumberOfCols()) + col;
raf.seek(this.startOfElementsPointer + (startIndex * 8));
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
elements[rowCount][colCount] = readNextDoubleFromRAF(raf);
colCount++;
}
rowCount++;
colCount = 0;
}
}
else
{
if (this.getTextElementLength() != 0)
{
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
int startIndex = (rowIndex * this.getNumberOfCols()) + col;
raf.seek(this.startOfElementsPointer + (startIndex * this.getTextElementLength()));
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
elements[rowCount][colCount] = readNextCharsFromRAF(raf, this.getTextElementLength());
colCount++;
}
rowCount++;
colCount = 0;
}
}
else
{
long byteOffset = 0;
int nextIndex = 0;
int lastIndex = 0;
int currentIndex = 0;
for (int rowIndex = row; rowIndex < row + nrows; rowIndex++)
{
nextIndex = (rowIndex * this.getNumberOfCols()) + col;
for (int i = lastIndex; i < nextIndex; i++)
{
byteOffset += this.getTextDataElementLengths()[i];
}
lastIndex = nextIndex + ncols;
raf.seek(this.startOfElementsPointer + byteOffset);
for (int colIndex = col; colIndex < col + ncols; colIndex++)
{
currentIndex = (rowIndex * this.getNumberOfCols()) + colIndex;
byte elementLength = this.getTextDataElementLengths()[currentIndex];
elements[rowCount][colCount] = readNextCharsFromRAF(raf, elementLength);
byteOffset += elementLength;
colCount++;
}
rowCount++;
colCount = 0;
}
}
}
raf.close();
return elements;
}
catch (Exception e)
{
throw new MatrixException(e);
}
}
@Override
public File getAsFile() throws Exception
{
return bin;
}
@Override
public void addColumn() throws Exception
{
throw new Exception("Action not possible");
}
@Override
public void addRow() throws Exception
{
throw new Exception("Action not possible");
}
@Override
public void updateElement() throws Exception
{
throw new Exception("Action not possible");
}
}