SuccinctStream.java example

Explorer

succinct-master
- core
  - src
    - main
      - java
        edu
        berkeley
        cs
        succinct
        StorageMode.java
        SuccinctCore.java
        SuccinctFile.java
        SuccinctIndexedFile.java
        buffers
        SuccinctBuffer.java
        SuccinctFileBuffer.java
        SuccinctIndexedFileBuffer.java
        examples
        Construct.java
        SuccinctShell.java
        regex
        RegExMatch.java
        SuccinctRegEx.java
        SuccinctRegExMatch.java
        executor
        RegExExecutor.java
        SuccinctBwdRegExExecutor.java
        SuccinctFwdRegExExecutor.java
        SuccinctRegExExecutor.java
        parser
        RegEx.java
        RegExBlank.java
        RegExConcat.java
        RegExParser.java
        RegExParsingException.java
        RegExPrimitive.java
        RegExRepeat.java
        RegExRepeatType.java
        RegExType.java
        RegExUnion.java
        RegExWildcard.java
        util
        BitUtils.java
        CommonUtils.java
        DictionaryUtils.java
        EliasGamma.java
        IOUtils.java
        Source.java
        SuccinctConfiguration.java
        SuccinctConstants.java
        bitmap
        BMArray.java
        BitMap.java
        buffer
        ThreadSafeByteBuffer.java
        ThreadSafeIntBuffer.java
        ThreadSafeLongBuffer.java
        serops
        ArrayOps.java
        BMArrayOps.java
        BitMapOps.java
        BitVectorOps.java
        DeltaEncodedIntVectorOps.java
        DictionaryOps.java
        IntVectorOps.java
        WaveletTreeOps.java
        container
        BasicArray.java
        ByteArray.java
        CharArray.java
        IntArray.java
        IntArrayList.java
        Pair.java
        Range.java
        dictionary
        Dictionary.java
        Tables.java
        iterator
        SearchIterator.java
        SearchRecordIterator.java
        suffixarray
        DivSufSort.java
        QSufSort.java
        SAIS.java
        vector
        BitVector.java
        DeltaEncodedIntVector.java
        IntVector.java
        wavelettree
        WaveletTree.java
    - test
      - java
        edu
        berkeley
        cs
        succinct
        SuccinctCoreTest.java
        SuccinctFileTest.java
        SuccinctIndexedFileTest.java
        TestUtils.java
        buffers
        SuccinctBuffer2Test.java
        SuccinctBufferTest.java
        SuccinctFileBuffer2Test.java
        SuccinctFileBuffer3Test.java
        SuccinctFileBufferTest.java
        SuccinctIndexedFileBuffer2Test.java
        SuccinctIndexedFileBufferTest.java
        regex
        executor
        RegExExecutorTest.java
        SuccinctBwdRegExExecutorTest.java
        SuccinctFwdRegExExecutorTest.java
        parser
        RegExParserTest.java
        util
        BitUtilsTest.java
        CommonUtilsTest.java
        DictionaryUtilsTest.java
        bitmap
        BMArrayTest.java
        BitMapTest.java
        buffer
        serops
        ArrayOpsTest.java
        BMArrayOpsTest.java
        BitMapOpsTest.java
        BitVectorOpsTest.java
        DeltaEncodedIntVectorOpsTest.java
        DictionaryOpsTest.java
        IntVectorOpsTest.java
        WaveletTreeOpsTest.java
        container
        IntArrayListTest.java
        dictionary
        DictionaryTest.java
        suffixarray
        DivSufSortTest.java
        QSufSortTest.java
        SAISTest.java
        vector
        BitVectorTest.java
        DeltaEncodedIntVectorTest.java
        IntVectorTest.java
        wavelettree
        WaveletTreeTest.java
- serde
  - src
    - main
      - java
        edu
        berkeley
        cs
        succinct
        DataType.java
        DeserializationException.java
        PrimitiveDeserializer.java
        PrimitiveSerializer.java
        SerializationException.java
        block
        BlockSerializer.java
        ByteArrayBlockSerializer.java
        json
        FieldMapping.java
        JsonBlockSerializer.java
        object
        deserializer
        JsonDeserializer.java
        ObjectDeserializer.java
        serializer
        ObjectSerializer.java
    - test
      - java
        edu
        berkeley
        cs
        succinct
        PrimitiveSerializerTest.java
        block
        ByteArrayBlockSerializerTest.java
        JsonBlockSerializerTest.java
        object
        deserializer
        JsonDeserializerTest.java
- spark
  - src
    - main
      - java
        edu
        berkeley
        cs
        succinct
        SuccinctTable.java
        buffers
        SuccinctTableBuffer.java
        streams
        SuccinctFileStream.java
        SuccinctIndexedFileStream.java
        SuccinctStream.java
        SuccinctTableStream.java
        util
        stream
        ByteArrayStream.java
        DeltaEncodedIntStream.java
        IntArrayStream.java
        LongArrayStream.java
        RandomAccessByteStream.java
        RandomAccessIntStream.java
        RandomAccessLongStream.java
        WaveletTreeStream.java
        serops
        ArrayOps.java
        BMArrayOps.java
        BitMapOps.java
        BitVectorOps.java
        DictionaryOps.java
        IntVectorOps.java
    - test
      - java
        edu
        berkeley
        cs
        succinct
        SuccinctTableTest.java
        buffers
        SuccinctTableBufferTest.java
        streams
        SuccinctFileStream2Test.java
        SuccinctFileStream3Test.java
        SuccinctFileStreamTest.java
        SuccinctIndexedFileStream2Test.java
        SuccinctIndexedFileStreamTest.java
        SuccinctStream2Test.java
        SuccinctStreamTest.java
        SuccinctTableStreamTest.java
        util
        stream
        ByteArrayStreamTest.java
        IntArrayStreamTest.java
        LongArrayStreamTest.java
        RandomAccessByteStreamTest.java
        RandomAccessIntStreamTest.java
        RandomAccessLongStreamTest.java
        TestUtils.java
        serops
        ArrayOpsTest.java
        BMArrayOpsTest.java
        BitMapOpsTest.java
        DictionaryOpsTest.java

package edu.berkeley.cs.succinct.streams;

import edu.berkeley.cs.succinct.SuccinctCore;
import edu.berkeley.cs.succinct.util.BitUtils;
import edu.berkeley.cs.succinct.util.CommonUtils;
import edu.berkeley.cs.succinct.util.SuccinctConstants;
import edu.berkeley.cs.succinct.util.stream.DeltaEncodedIntStream;
import edu.berkeley.cs.succinct.util.stream.IntArrayStream;
import edu.berkeley.cs.succinct.util.stream.LongArrayStream;
import edu.berkeley.cs.succinct.util.stream.serops.ArrayOps;
import edu.berkeley.cs.succinct.util.stream.serops.IntVectorOps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

/**
 * Stream based implementation for Succinct algorithms
 */
public class SuccinctStream extends SuccinctCore {

  protected transient LongArrayStream sa;
  protected transient LongArrayStream isa;
  protected transient IntArrayStream columnoffsets;
  protected transient DeltaEncodedIntStream[] columns;

  protected transient FSDataInputStream originalStream;
  protected transient long endOfCoreStream;

  private transient Configuration conf;

  /**
   * Constructor to map a file containing Succinct data structures via stream.
   *
   * @param filePath Path of the file.
   * @param conf     Configuration for the filesystem.
   * @throws IOException
   */
  public SuccinctStream(Path filePath, Configuration conf) throws IOException {
    this.conf = conf;

    FSDataInputStream is = getStream(filePath);

    setOriginalSize(is.readInt());
    setSamplingRateSA(is.readInt());
    setSamplingRateISA(is.readInt());
    setSamplingRateNPA(is.readInt());
    setSampleBitWidth(is.readInt());
    setAlphabetSize(is.readInt());

    // Read alphabet
    alphabet = new int[getAlphabetSize()];
    for (int i = 0; i < getAlphabetSize(); i++) {
      alphabet[i] = is.readInt();
    }

    // Compute number of sampled elements
    int totalSampledBitsSA =
      CommonUtils.numBlocks(getOriginalSize(), getSamplingRateSA()) * getSampleBitWidth();
    int saSize = BitUtils.bitsToBlocks64(totalSampledBitsSA) * SuccinctConstants.LONG_SIZE_BYTES;

    // Map SA
    sa = new LongArrayStream(is, is.getPos(), saSize);
    is.seek(is.getPos() + saSize);

    // Compute number of sampled elements
    int totalSampledBitsISA =
      CommonUtils.numBlocks(getOriginalSize(), getSamplingRateISA()) * getSampleBitWidth();
    int isaSize = BitUtils.bitsToBlocks64(totalSampledBitsISA) * SuccinctConstants.LONG_SIZE_BYTES;

    // Map ISA
    isa = new LongArrayStream(is, is.getPos(), isaSize);
    is.seek(is.getPos() + isaSize);

    // Map columnoffsets
    int columnoffsetsSize = getAlphabetSize() * SuccinctConstants.INT_SIZE_BYTES;
    columnoffsets = new IntArrayStream(is, is.getPos(), columnoffsetsSize);
    is.seek(is.getPos() + columnoffsetsSize);

    columns = new DeltaEncodedIntStream[getAlphabetSize()];
    for (int i = 0; i < getAlphabetSize(); i++) {
      int columnSize = is.readInt();
      assert columnSize != 0;
      columns[i] = new DeltaEncodedIntStream(is, is.getPos());
      is.seek(is.getPos() + columnSize);
    }

    endOfCoreStream = is.getPos();

    is.seek(0);
    this.originalStream = is;
  }

  /**
   * Constructor to map a file containing Succinct data structures via stream
   *
   * @param filePath Path of the file.
   * @throws IOException
   */
  public SuccinctStream(Path filePath) throws IOException {
    this(filePath, new Configuration());
  }

  /**
   * Opens a new FSDataInputStream on the provided file.
   *
   * @param path Path of the file.
   * @return A FSDataInputStream.
   * @throws IOException
   */
  protected FSDataInputStream getStream(Path path) throws IOException {
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    return fs.open(path);
  }

  /**
   * Get the size (in bytes) of Succinct data structures (compressed).
   *
   * @return Size (in bytes) of Succinct data structures (compressed).
   */
  @Override public int getCoreSize() {
    return 0;
  }

  /**
   * Lookup NPA at specified index.
   *
   * @param i Index into NPA.
   * @return Value of NPA at specified index.
   */
  @Override public long lookupNPA(long i) {

    if (i > getOriginalSize() - 1 || i < 0) {
      throw new ArrayIndexOutOfBoundsException(
        "NPA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
    }

    try {
      int colId = ArrayOps.getRank1(columnoffsets, 0, getAlphabetSize(), (int) i) - 1;

      assert colId < getAlphabetSize();
      assert columnoffsets.get(colId) <= i;

      return (long) columns[colId].get((int) (i - columnoffsets.get(colId)));
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Lookup SA at specified index.
   *
   * @param i Index into SA.
   * @return Value of SA at specified index.
   */
  @Override public long lookupSA(long i) {

    if (i > getOriginalSize() - 1 || i < 0) {
      throw new ArrayIndexOutOfBoundsException(
        "SA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
    }

    try {
      int j = 0;
      while (i % getSamplingRateSA() != 0) {
        i = lookupNPA(i);
        j++;
      }
      long saVal = IntVectorOps.get(sa, (int) (i / getSamplingRateSA()), getSampleBitWidth());

      if (saVal < j)
        return getOriginalSize() - (j - saVal);
      return saVal - j;
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Lookup ISA at specified index.
   *
   * @param i Index into ISA.
   * @return Value of ISA at specified index.
   */
  @Override public long lookupISA(long i) {

    if (i > getOriginalSize() - 1 || i < 0) {
      throw new ArrayIndexOutOfBoundsException(
        "ISA index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
    }

    try {

      int sampleIdx = (int) (i / getSamplingRateISA());
      int pos = IntVectorOps.get(isa, sampleIdx, getSampleBitWidth());
      i -= (sampleIdx * getSamplingRateISA());
      while (i-- != 0) {
        pos = (int) lookupNPA(pos);
      }
      return pos;
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Lookup up the inverted alphabet map at specified index.
   *
   * @param i Index into inverted alphabet map
   * @return Value of inverted alphabet map at specified index.
   */
  @Override public int lookupC(long i) {

    if (i > getOriginalSize() - 1 || i < 0) {
      throw new ArrayIndexOutOfBoundsException(
        "C index out of bounds: i = " + i + " originalSize = " + getOriginalSize());
    }

    try {
      int idx = ArrayOps.getRank1(columnoffsets, 0, getAlphabetSize(), (int) i) - 1;
      return alphabet[idx];
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Binary Search for a value withing NPA.
   *
   * @param val      Value to be searched.
   * @param startIdx Starting index into NPA.
   * @param endIdx   Ending index into NPA.
   * @param flag     Whether to search for left or the right boundary.
   * @return Search result as an index into the NPA.
   */
  @Override public long binSearchNPA(long val, long startIdx, long endIdx, boolean flag) {

    long sp = startIdx;
    long ep = endIdx;
    long m;

    while (sp <= ep) {
      m = (sp + ep) / 2;

      long npaVal;
      npaVal = lookupNPA(m);

      if (npaVal == val) {
        return m;
      } else if (val < npaVal) {
        ep = m - 1;
      } else {
        sp = m + 1;
      }
    }

    return flag ? ep : sp;
  }

  /**
   * Close all underlying stream.
   */
  void close() throws IOException {
    originalStream.close();
  }
}