/**
* GeDBIT.type.SequenceTable 2006.07.24
*
* Copyright Information:
*
* Change Log:
* 2006.07.24: Added, by Willard
*/
package GeDBIT.type;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import GeDBIT.dist.SequenceFragmentMetric;
import GeDBIT.index.algorithms.PartitionMethod;
import GeDBIT.index.algorithms.PivotSelectionMethod;
/**
*
* @author Willard
*
*/
public abstract class SequenceTable extends Table {
private static final long serialVersionUID = -1519446228237674948L;
protected Alphabet alphabet;
protected int fragmentLength;
protected Sequence[] sequences;
protected int[] fragmentOffsets;
/**
* @param fileName
* @param maxSize
* @param metric
* @param fragmentLength
* @throws IOException
*/
protected SequenceTable(String fileName, String indexPrefix, int maxSize,
SequenceFragmentMetric metric, int fragmentLength)
throws IOException {
super(fileName, indexPrefix, maxSize, metric);
if (fragmentLength <= 0)
throw new IllegalArgumentException(
"fragment length must be greater than zero!");
this.alphabet = metric.getWeightMatrix().getAlphabet();
this.fragmentLength = fragmentLength;
BufferedReader reader = new java.io.BufferedReader(
new java.io.FileReader(fileName));
loadData(reader, maxSize);
initFragmentList(maxSize);
}
/**
* @param reader
* @param maxSize
*/
protected abstract void loadData(BufferedReader reader, int maxSize);
/**
* @param size
*/
private void initFragmentList(int size) {
int count = 0;
// first figure out how long arrays are going to be;
for (int i = 0; i < sequences.length; i++) {
int numFragments = sequences[i].numFragments(fragmentLength);
for (int j = 0; j < numFragments; j++) {
if (count < size) {
count++;
}
}
}
// init rowIDs list;
ArrayList<Fragment> fragmentList = new ArrayList<Fragment>(count);
originalRowIDs = new int[count];
fragmentOffsets = new int[count];
// reset count;
count = 0;
for (int i = 0; i < sequences.length; i++) {
int numFragments = sequences[i].numFragments(fragmentLength);
for (int j = 0; j < numFragments; j++) {
if (count < size) {
this.originalRowIDs[count] = i;
this.fragmentOffsets[count] = j;
Fragment frag = new Fragment(this, count);
fragmentList.add(frag);
count++;
}
}
}
fragmentList.trimToSize();
data = fragmentList;
}
// TODO make better
/*
* (non-Javadoc)
*
* @see GeDBIT.type.Table#compressData()
*/
public void compressData() {
// first sort the list according to the data points.
Collections.sort(data);
// then, make a list of the unique dataPoints.
final int dataSize = data.size();
ArrayList<IndexObject> compressedData = new ArrayList<IndexObject>(
dataSize);
int[] rowIDs2 = new int[dataSize];
int[] dataOffset2 = new int[dataSize];
IndexObject dataPoint1 = data.get(0);
int tempSize = 1;
IndexObject dataPoint2;
for (int i = 1; i < dataSize; i++) {
dataPoint2 = (IndexObject) data.get(i);
if (dataPoint1.equals(dataPoint2)) {
tempSize++;
} else {
if (tempSize > 1) {
for (int j = i - tempSize; j < i; j++) {
int rowID = data.get(j).getRowID();
rowIDs2[j] = originalRowIDs[rowID];
dataOffset2[j] = fragmentOffsets[rowID];
}
dataPoint1.setRowID(i - tempSize);
dataPoint1.setRowIDLength(tempSize);
} else {
int rowID = data.get(i - 1).getRowID();
rowIDs2[i - 1] = originalRowIDs[rowID];
dataOffset2[i - 1] = fragmentOffsets[rowID];
dataPoint1.setRowID(i - 1);
}
compressedData.add(dataPoint1);
dataPoint1 = dataPoint2;
tempSize = 1;
}
}
if (tempSize > 1) {
for (int i = dataSize - tempSize; i < dataSize; i++) {
int rowID = data.get(i).getRowID();
rowIDs2[i] = originalRowIDs[rowID];
dataOffset2[i] = fragmentOffsets[rowID];
}
dataPoint1.setRowID(dataSize - tempSize);
dataPoint1.setRowIDLength(tempSize);
} else {
int rowID = data.get(dataSize - 1).getRowID();
rowIDs2[dataSize - 1] = originalRowIDs[rowID];
dataOffset2[dataSize - 1] = fragmentOffsets[rowID];
dataPoint1.setRowID(dataSize - 1);
}
compressedData.add(dataPoint1);
compressedData.trimToSize();
// System.out.println("original size: " + dataSize +
// " compressed data size: " + compressedData.size());
data = compressedData;
originalRowIDs = rowIDs2;
fragmentOffsets = dataOffset2;
}
/**
* @return
*/
public int getFragmentLength() {
return fragmentLength;
}
/**
* @param rowID
* @return
*/
public int getFragmentOffset(int rowID) {
return fragmentOffsets[rowID];
}
/*
* (non-Javadoc)
*
* @see GeDBIT.type.Table#createIndexFileName(GeDBIT.index.algorithms.
* PivotSelectionMethod, int, GeDBIT.index.algorithms.PartitionMethod, int,
* int, int, boolean)
*/
protected String createIndexFileName(PivotSelectionMethod psm,
int numPivots, PartitionMethod pm, int singlePivotFanout,
int maxLeafSize, int maxPathLength, boolean bucket) {
String psmName;
if (psm instanceof GeDBIT.index.algorithms.IncrementalSelection)
psmName = "incremental";
else
psmName = psm.toString();
StringBuffer myFileName = new StringBuffer(sourceFileName + "-"
+ psmName + "-" + numPivots + "-" + pm + "-"
+ singlePivotFanout + "-MLS-" + maxLeafSize + "-MPL-"
+ maxPathLength + "-FL-" + fragmentLength);
if (maxSize > 0) {
myFileName.append("-S-" + maxSize);
}
if (bucket == true) {
myFileName.append("-b-");
}
return myFileName.toString();
}
}