package edu.berkeley.cs.succinct.streams;
import edu.berkeley.cs.succinct.SuccinctCore;
import edu.berkeley.cs.succinct.util.BitUtils;
import edu.berkeley.cs.succinct.util.CommonUtils;
import edu.berkeley.cs.succinct.util.SuccinctConstants;
import edu.berkeley.cs.succinct.util.stream.DeltaEncodedIntStream;
import edu.berkeley.cs.succinct.util.stream.IntArrayStream;
import edu.berkeley.cs.succinct.util.stream.LongArrayStream;
import edu.berkeley.cs.succinct.util.stream.serops.ArrayOps;
import edu.berkeley.cs.succinct.util.stream.serops.IntVectorOps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
/**
* Stream based implementation for Succinct algorithms
*/
public class SuccinctStream extends SuccinctCore {
protected transient LongArrayStream sa;
protected transient LongArrayStream isa;
protected transient IntArrayStream columnoffsets;
protected transient DeltaEncodedIntStream[] columns;
protected transient FSDataInputStream originalStream;
protected transient long endOfCoreStream;
private transient Configuration conf;
/**
* Constructor to map a file containing Succinct data structures via stream.
*
* @param filePath Path of the file.
* @param conf Configuration for the filesystem.
* @throws IOException
*/
public SuccinctStream(Path filePath, Configuration conf) throws IOException {
this.conf = conf;
FSDataInputStream is = getStream(filePath);
setOriginalSize(is.readInt());
setSamplingRateSA(is.readInt());
setSamplingRateISA(is.readInt());
setSamplingRateNPA(is.readInt());
setSampleBitWidth(is.readInt());
setAlphabetSize(is.readInt());
// Read alphabet
alphabet = new int[getAlphabetSize()];
for (int i = 0; i < getAlphabetSize(); i++) {
alphabet[i] = is.readInt();
}
// Compute number of sampled elements
int totalSampledBitsSA =
CommonUtils.numBlocks(getOriginalSize(), getSamplingRateSA()) * getSampleBitWidth();
int saSize = BitUtils.bitsToBlocks64(totalSampledBitsSA) * SuccinctConstants.LONG_SIZE_BYTES;
// Map SA
sa = new LongArrayStream(is, is.getPos(), saSize);
is.seek(is.getPos() + saSize);
// Compute number of sampled elements
int totalSampledBitsISA =
CommonUtils.numBlocks(getOriginalSize(), getSamplingRateISA()) * getSampleBitWidth();
int isaSize = BitUtils.bitsToBlocks64(totalSampledBitsISA) * SuccinctConstants.LONG_SIZE_BYTES;
// Map ISA
isa = new LongArrayStream(is, is.getPos(), isaSize);
is.seek(is.getPos() + isaSize);
// Map columnoffsets
int columnoffsetsSize = getAlphabetSize() * SuccinctConstants.INT_SIZE_BYTES;
columnoffsets = new IntArrayStream(is, is.getPos(), columnoffsetsSize);
is.seek(is.getPos() + columnoffsetsSize);
columns = new DeltaEncodedIntStream[getAlphabetSize()];
for (int i = 0; i < getAlphabetSize(); i++) {
int columnSize = is.readInt();
assert columnSize != 0;
columns[i] = new DeltaEncodedIntStream(is, is.getPos());
is.seek(is.getPos() + columnSize);
}
endOfCoreStream = is.getPos();
is.seek(0);
this.originalStream = is;
}
/**
* Constructor to map a file containing Succinct data structures via stream
*
* @param filePath Path of the file.
* @throws IOException
*/
public SuccinctStream(Path filePath) throws IOException {
this(filePath, new Configuration());
}
/**
* Opens a new FSDataInputStream on the provided file.
*
* @param path Path of the file.
* @return A FSDataInputStream.
* @throws IOException
*/
protected FSDataInputStream getStream(Path path) throws IOException {
FileSystem fs = FileSystem.get(path.toUri(), conf);
return fs.open(path);
}
/**
* Get the size (in bytes) of Succinct data structures (compressed).
*
* @return Size (in bytes) of Succinct data structures (compressed).
*/
@Override public int getCoreSize() {
return 0;
}
/**
* Lookup NPA at specified index.
*
* @param i Index into NPA.
* @return Value of NPA at specified index.
*/
@Override public long lookupNPA(long i) {
if (i > getOriginalSize() - 1 || i < 0) {
throw new ArrayIndexOutOfBoundsException(
"NPA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
}
try {
int colId = ArrayOps.getRank1(columnoffsets, 0, getAlphabetSize(), (int) i) - 1;
assert colId < getAlphabetSize();
assert columnoffsets.get(colId) <= i;
return (long) columns[colId].get((int) (i - columnoffsets.get(colId)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Lookup SA at specified index.
*
* @param i Index into SA.
* @return Value of SA at specified index.
*/
@Override public long lookupSA(long i) {
if (i > getOriginalSize() - 1 || i < 0) {
throw new ArrayIndexOutOfBoundsException(
"SA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
}
try {
int j = 0;
while (i % getSamplingRateSA() != 0) {
i = lookupNPA(i);
j++;
}
long saVal = IntVectorOps.get(sa, (int) (i / getSamplingRateSA()), getSampleBitWidth());
if (saVal < j)
return getOriginalSize() - (j - saVal);
return saVal - j;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Lookup ISA at specified index.
*
* @param i Index into ISA.
* @return Value of ISA at specified index.
*/
@Override public long lookupISA(long i) {
if (i > getOriginalSize() - 1 || i < 0) {
throw new ArrayIndexOutOfBoundsException(
"ISA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
}
try {
int sampleIdx = (int) (i / getSamplingRateISA());
int pos = IntVectorOps.get(isa, sampleIdx, getSampleBitWidth());
i -= (sampleIdx * getSamplingRateISA());
while (i-- != 0) {
pos = (int) lookupNPA(pos);
}
return pos;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Lookup up the inverted alphabet map at specified index.
*
* @param i Index into inverted alphabet map
* @return Value of inverted alphabet map at specified index.
*/
@Override public int lookupC(long i) {
if (i > getOriginalSize() - 1 || i < 0) {
throw new ArrayIndexOutOfBoundsException(
"C index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
}
try {
int idx = ArrayOps.getRank1(columnoffsets, 0, getAlphabetSize(), (int) i) - 1;
return alphabet[idx];
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Binary Search for a value withing NPA.
*
* @param val Value to be searched.
* @param startIdx Starting index into NPA.
* @param endIdx Ending index into NPA.
* @param flag Whether to search for left or the right boundary.
* @return Search result as an index into the NPA.
*/
@Override public long binSearchNPA(long val, long startIdx, long endIdx, boolean flag) {
long sp = startIdx;
long ep = endIdx;
long m;
while (sp <= ep) {
m = (sp + ep) / 2;
long npaVal;
npaVal = lookupNPA(m);
if (npaVal == val) {
return m;
} else if (val < npaVal) {
ep = m - 1;
} else {
sp = m + 1;
}
}
return flag ? ep : sp;
}
/**
* Close all underlying stream.
*/
void close() throws IOException {
originalStream.close();
}
}