package edu.berkeley.cs.succinct.streams; import edu.berkeley.cs.succinct.SuccinctFile; import edu.berkeley.cs.succinct.regex.RegExMatch; import edu.berkeley.cs.succinct.regex.SuccinctRegEx; import edu.berkeley.cs.succinct.regex.parser.RegExParsingException; import edu.berkeley.cs.succinct.util.Source; import edu.berkeley.cs.succinct.util.SuccinctConstants; import edu.berkeley.cs.succinct.util.container.Range; import edu.berkeley.cs.succinct.util.iterator.SearchIterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.Set; public class SuccinctFileStream extends SuccinctStream implements SuccinctFile { protected transient long endOfFileStream; /** * Constructor to map a file containing Succinct data structures via stream. * * @param filePath Path of the file. * @param conf Configuration for the filesystem. * @throws IOException */ public SuccinctFileStream(Path filePath, Configuration conf) throws IOException { super(filePath, conf); endOfFileStream = endOfCoreStream; } /** * Constructor to map a file containing Succinct data structures via stream * * @param filePath Path of the file. * @throws IOException */ public SuccinctFileStream(Path filePath) throws IOException { this(filePath, new Configuration()); } /** * Get the alphabet for the succinct file. * * @return The alphabet for the succinct file. */ @Override public int[] getAlphabet() { return alphabet; } /** * Get the size of the uncompressed file. * * @return The size of the uncompressed file. */ @Override public int getSize() { return getOriginalSize(); } @Override public int getCompressedSize() { return getCoreSize(); } /** * Get the character at specified index into succinct file * * @param i Index into succinct file. * @return The character at specified index. */ @Override public char charAt(long i) { return (char) lookupC(lookupISA(i)); } /** * Extract data of specified length from Succinct data structures at specified index. * * @param offset Index into original input to start extracting at. * @param len Length of data to be extracted. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted data. */ @Override public String extract(long offset, int len, ExtractContext ctx) { StringBuilder out = new StringBuilder(len); long s = lookupISA(offset); for (int k = 0; k < len && offset + k < getOriginalSize(); k++) { int nextChar = lookupC(s); if (nextChar < Character.MIN_VALUE || nextChar > Character.MAX_VALUE) break; out.append((char) nextChar); s = lookupNPA(s); } if (ctx != null) ctx.marker = s; return out.toString(); } /** * Extract data of specified length from Succinct data structures at specified index. * * @param offset Index into original input to start extracting at. * @param length Length of data to be extracted. * @return Extracted data. */ @Override public String extract(long offset, int length) { return extract(offset, length, null); } /** * Extract data of specified length from Succinct data structures. * * @param ctx Extract context containing the end marker of previous extract. * @param len Length of data to be extracted. * @return Extracted data. */ @Override public String extract(ExtractContext ctx, int len) { StringBuilder out = new StringBuilder(len); for (int k = 0; k < len; k++) { int nextChar = lookupC(ctx.marker); if (nextChar < Character.MIN_VALUE || nextChar > Character.MAX_VALUE) break; out.append((char) nextChar); ctx.marker = lookupNPA(ctx.marker); } return out.toString(); } /** * Extract data from Succinct data structures at specified index until specified delimiter. * * @param offset Index into original input to start extracting at. * @param delim Delimiter at which to stop extracting. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted data. */ @Override public String extractUntil(long offset, int delim, ExtractContext ctx) { StringBuilder out = new StringBuilder(); long s = lookupISA(offset); do { int nextChar = lookupC(s); if (nextChar == delim || nextChar == SuccinctConstants.EOF) break; out.append((char) nextChar); s = lookupNPA(s); } while (true); if (ctx != null) ctx.marker = s; return out.toString(); } /** * Extract data from Succinct data structures at specified index until specified delimiter. * * @param offset Index into original input to start extracting at. * @param delim Delimiter at which to stop extracting. * @return Extracted data. */ @Override public String extractUntil(long offset, int delim) { return extractUntil(offset, delim, null); } /** * Extract data from Succinct data structures until specified delimiter. * * @param ctx Extract context containing the end marker of previous extract. * @param delim Delimiter at which to stop extracting. * @return Extracted data. */ @Override public String extractUntil(ExtractContext ctx, int delim) { StringBuilder out = new StringBuilder(); do { int nextChar = lookupC(ctx.marker); if (nextChar == delim || nextChar == SuccinctConstants.EOF) break; out.append((char) nextChar); ctx.marker = lookupNPA(ctx.marker); } while (true); return out.toString(); } /** * Extract data of specified length from Succinct data structures at specified index. * * @param offset Index into original input to start extracting at. * @param len Length of data to be extracted. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted data. */ @Override public byte[] extractBytes(long offset, int len, ExtractContext ctx) { ByteArrayOutputStream out = new ByteArrayOutputStream(len); long s = lookupISA(offset); for (int k = 0; k < len && offset + k < getOriginalSize(); k++) { int nextByte = lookupC(s); if (nextByte < Byte.MIN_VALUE || nextByte > Byte.MAX_VALUE) break; out.write(nextByte); s = lookupNPA(s); } if (ctx != null) ctx.marker = s; return out.toByteArray(); } /** * Extract data of specified length from Succinct data structures at specified index. * * @param offset Index into original input to start extracting at. * @param length Length of data to be extracted. * @return Extracted data. */ @Override public byte[] extractBytes(long offset, int length) { return extractBytes(offset, length, null); } /** * Extract data of specified length from Succinct data structures. * * @param ctx Extract context containing the end marker of previous extract. * @param len Length of data to be extracted. * @return Extracted data. */ @Override public byte[] extractBytes(ExtractContext ctx, int len) { ByteArrayOutputStream out = new ByteArrayOutputStream(len); for (int k = 0; k < len; k++) { int nextByte = lookupC(ctx.marker); if (nextByte < Byte.MIN_VALUE || nextByte > Byte.MAX_VALUE) break; out.write(nextByte); ctx.marker = lookupNPA(ctx.marker); } return out.toByteArray(); } /** * Extract data from Succinct data structures at specified index until specified delimiter. * * @param offset Index into original input to start extracting at. * @param delim Delimiter at which to stop extracting. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted data. */ @Override public byte[] extractBytesUntil(long offset, int delim, ExtractContext ctx) { ByteArrayOutputStream out = new ByteArrayOutputStream(); long s = lookupISA(offset); do { int nextByte = lookupC(s); if (nextByte == delim || nextByte == SuccinctConstants.EOF) break; out.write(nextByte); s = lookupNPA(s); } while (true); if (ctx != null) ctx.marker = s; return out.toByteArray(); } /** * Extract data from Succinct data structures at specified index until specified delimiter. * * @param offset Index into original input to start extracting at. * @param delim Delimiter at which to stop extracting. * @return Extracted data. */ @Override public byte[] extractBytesUntil(long offset, int delim) { return extractBytesUntil(offset, delim, null); } /** * Extract data from Succinct data structures until specified delimiter. * * @param ctx Extract context containing the end marker of previous extract. * @param delim Delimiter at which to stop extracting. * @return Extracted data. */ @Override public byte[] extractBytesUntil(ExtractContext ctx, int delim) { ByteArrayOutputStream out = new ByteArrayOutputStream(); do { int nextByte = lookupC(ctx.marker); if (nextByte == delim || nextByte == SuccinctConstants.EOF) break; out.write(nextByte); ctx.marker = lookupNPA(ctx.marker); } while (true); return out.toByteArray(); } /** * Extract short integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted short integer. */ @Override public short extractShort(int offset, ExtractContext ctx) { long s = lookupISA(offset); int byte0 = lookupC(s); s = lookupNPA(s); int byte1 = lookupC(s); if (ctx != null) ctx.marker = lookupNPA(ctx.marker); return (short) ((byte0 << 8) | (byte1 & 0xFF)); } /** * Extract short integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @return Extracted short integer. */ @Override public short extractShort(int offset) { return extractShort(offset, null); } /** * Extract short integer at specified offset. * * @param ctx Extract context containing the end marker of previous extract. * @return Extracted short integer. */ @Override public short extractShort(ExtractContext ctx) { int byte0 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte1 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); return (short) ((byte0 << 8) | (byte1 & 0xFF)); } /** * Extract integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted integer. */ @Override public int extractInt(int offset, ExtractContext ctx) { long s = lookupISA(offset); int byte0 = lookupC(s); s = lookupNPA(s); int byte1 = lookupC(s); s = lookupNPA(s); int byte2 = lookupC(s); s = lookupNPA(s); int byte3 = lookupC(s); if (ctx != null) ctx.marker = lookupNPA(s); return (byte0 << 24) | ((byte1 & 0xFF) << 16) | ((byte2 & 0xFF) << 8) | (byte3 & 0xFF); } /** * Extract integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @return Extracted integer. */ @Override public int extractInt(int offset) { return extractInt(offset, null); } /** * Extract integer at specified offset. * * @param ctx Extract context containing the end marker of previous extract. * @return Extracted integer. */ @Override public int extractInt(ExtractContext ctx) { int byte0 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte1 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte2 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte3 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); return (byte0 << 24) | ((byte1 & 0xFF) << 16) | ((byte2 & 0xFF) << 8) | (byte3 & 0xFF); } /** * Extract long integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @param ctx Extract context to be populated with end marker of extract. * @return Extracted long integer. */ @Override public long extractLong(int offset, ExtractContext ctx) { long s = lookupISA(offset); int byte0 = lookupC(s); s = lookupNPA(s); int byte1 = lookupC(s); s = lookupNPA(s); int byte2 = lookupC(s); s = lookupNPA(s); int byte3 = lookupC(s); s = lookupNPA(s); int byte4 = lookupC(s); s = lookupNPA(s); int byte5 = lookupC(s); s = lookupNPA(s); int byte6 = lookupC(s); s = lookupNPA(s); int byte7 = lookupC(s); if (ctx != null) ctx.marker = lookupNPA(s); return ((long) byte0 << 56) | ((long) (byte1 & 0xFF) << 48) | ((long) (byte2 & 0xFF) << 40) | ( (long) (byte3 & 0xFF) << 32) | ((long) (byte4 & 0xFF) << 24) | ((byte5 & 0xFF) << 16) | ( (byte6 & 0xFF) << 8) | ((byte7 & 0xFF)); } /** * Extract long integer at specified offset. * * @param offset Offset into the original input to start extracting at. * @return Extracted long integer. */ @Override public long extractLong(int offset) { return extractLong(offset, null); } /** * Extract long integer at specified offset. * * @param ctx Extract context containing the end marker of previous extract. * @return Extracted long integer. */ @Override public long extractLong(ExtractContext ctx) { int byte0 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte1 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte2 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte3 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte4 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte5 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte6 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); int byte7 = lookupC(ctx.marker); ctx.marker = lookupNPA(ctx.marker); return ((long) byte0 << 56) | ((long) (byte1 & 0xFF) << 48) | ((long) (byte2 & 0xFF) << 40) | ( (long) (byte3 & 0xFF) << 32) | ((long) (byte4 & 0xFF) << 24) | ((byte5 & 0xFF) << 16) | ( (byte6 & 0xFF) << 8) | ((byte7 & 0xFF)); } /** * Perform a range search to obtain SA range between two given queries. * * @param buf1 The beginning of the range. * @param buf2 The end of the range. * @return The range into SA. */ @Override public Range rangeSearch(char[] buf1, char[] buf2) { return new Range(fwdSearch(buf1).begin(), fwdSearch(buf2).end()); } /** * Perform a range search to obtain SA range between two given queries. * * @param buf1 The beginning of the range. * @param buf2 The end of the range. * @return The range into SA. */ @Override public Range rangeSearch(byte[] buf1, byte[] buf2) { return new Range(fwdSearch(buf1).begin(), fwdSearch(buf2).end()); } /** * Perform a range search to obtain SA range between two given queries. * * @param buf1 The beginning of the range. * @param buf2 The end of the range. * @return The range into SA. */ @Override public Range rangeSearch(Source buf1, Source buf2) { return new Range(fwdSearch(buf1).begin(), fwdSearch(buf2).end()); } /** * Perform backward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range bwdSearch(Source buf) { Range range = new Range(0L, -1L); int m = buf.length(); long c1, c2; int pos = findCharacter(buf.get(m - 1)); if (pos >= 0) { try { range.first = columnoffsets.get(pos); range.second = ((pos + 1) == getAlphabetSize() ? getOriginalSize() : columnoffsets.get(pos + 1)) - 1; } catch (IOException e) { throw new RuntimeException(e); } } else { return new Range(0L, -1L); } for (int i = m - 2; i >= 0; i--) { pos = findCharacter(buf.get(i)); if (pos >= 0) { try { c1 = columnoffsets.get(pos); c2 = ((pos + 1) == getAlphabetSize() ? getOriginalSize() : columnoffsets.get(pos + 1)) - 1; } catch (IOException e) { throw new RuntimeException(e); } } else { return new Range(0L, -1L); } if (c1 > c2) { return new Range(0L, -1L); } range.first = binSearchNPA(range.first, c1, c2, false); range.second = binSearchNPA(range.second, c1, c2, true); if (range.first > range.second) { return new Range(0L, -1L); } } return range; } /** * Perform backward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range bwdSearch(final byte[] buf) { return bwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }); } /** * Perform backward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range bwdSearch(final char[] buf) { return bwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }); } /** * Continue backward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @return Range into SA. */ @Override public Range continueBwdSearch(Source buf, Range range) { if (range.empty()) { return range; } Range newRange = new Range(range.first, range.second); int m = buf.length(); long c1, c2; for (int i = m - 1; i >= 0; i--) { int pos = findCharacter(buf.get(i)); if (pos >= 0) { try { c1 = columnoffsets.get(pos); c2 = ((pos + 1) == getAlphabetSize() ? getOriginalSize() : columnoffsets.get(pos + 1)) - 1; } catch (IOException e) { throw new RuntimeException(e); } } else { return new Range(0L, -1L); } if (c1 > c2) { return new Range(0L, -1L); } newRange.first = binSearchNPA(newRange.first, c1, c2, false); newRange.second = binSearchNPA(newRange.second, c1, c2, true); if (newRange.first > newRange.second) { return new Range(0L, -1L); } } return newRange; } /** * Continue backward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @return Range into SA. */ @Override public Range continueBwdSearch(final byte[] buf, Range range) { return continueBwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, range); } /** * Continue backward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @return Range into SA. */ @Override public Range continueBwdSearch(final char[] buf, Range range) { return continueBwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, range); } /** * Compare entire buffer with input starting at specified index. * * @param buf The buffer to compare with. * @param i The index into input. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(Source buf, int i) { int j = 0; do { int c = lookupC(i); int b = buf.get(j); if (b < c) { return -1; } else if (b > c) { return 1; } i = (int) lookupNPA(i); j++; } while (j < buf.length()); return 0; } /** * Compare entire buffer with input starting at specified index. * * @param buf The buffer to compare with. * @param i The index into input. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(final byte[] buf, int i) { return compare(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, i); } /** * Compare entire buffer with input starting at specified index. * * @param buf The buffer to compare with. * @param i The index into input. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(final char[] buf, int i) { return compare(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, i); } /** * Compare entire buffer with input starting at specified index and offset * into buffer. * * @param buf The buffer to compare with. * @param i The index into input. * @param offset Offset into buffer. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(Source buf, int i, int offset) { int j = 0; while (offset != 0) { i = (int) lookupNPA(i); offset--; } do { int c = lookupC(i); int b = buf.get(j); if (b < c) { return -1; } else if (b > c) { return 1; } i = (int) lookupNPA(i); j++; } while (j < buf.length()); return 0; } /** * Compare entire buffer with input starting at specified index and offset * into buffer. * * @param buf The buffer to compare with. * @param i The index into input. * @param offset Offset into buffer. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(final byte[] buf, int i, int offset) { return compare(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, i, offset); } /** * Compare entire buffer with input starting at specified index and offset * into buffer. * * @param buf The buffer to compare with. * @param i The index into input. * @param offset Offset into buffer. * @return -1 if buf is smaller, 0 if equal and 1 if buf is greater. */ @Override public int compare(final char[] buf, int i, int offset) { return compare(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, i, offset); } /** * Perform forward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range fwdSearch(Source buf) { int st = getOriginalSize() - 1; int sp = 0; int s; while (sp < st) { s = (sp + st) / 2; if (compare(buf, s) > 0) { sp = s + 1; } else { st = s; } } int et = getOriginalSize() - 1; int ep = sp - 1; int e; while (ep < et) { e = (int) Math.ceil((double) (ep + et) / 2); if (compare(buf, e) == 0) { ep = e; } else { et = e - 1; } } return new Range(sp, ep); } /** * Perform forward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range fwdSearch(final byte[] buf) { return fwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }); } /** * Perform forward search to obtain SA range for a query. * * @param buf Input query. * @return Range into SA. */ @Override public Range fwdSearch(final char[] buf) { return fwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }); } /** * Continue forward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @param offset Offset into input query. * @return Range into SA. */ @Override public Range continueFwdSearch(Source buf, Range range, int offset) { if (buf.length() == 0 || range.empty()) { return range; } int st = (int) range.second; int sp = (int) range.first; int s; while (sp < st) { s = (sp + st) / 2; if (compare(buf, s, offset) > 0) { sp = sp + 1; } else { st = s; } } int et = (int) range.second; int ep = sp - 1; int e; while (ep < et) { e = (int) Math.ceil((double) (ep + et) / 2); if (compare(buf, e, offset) == 0) { ep = e; } else { et = e - 1; } } return new Range(sp, ep); } /** * Continue forward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @param offset Offset into input query. * @return Range into SA. */ @Override public Range continueFwdSearch(final byte[] buf, Range range, int offset) { return continueFwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, range, offset); } /** * Continue forward search on query to obtain SA range. * * @param buf Input query. * @param range Range to start from. * @param offset Offset into input query. * @return Range into SA. */ @Override public Range continueFwdSearch(final char[] buf, Range range, int offset) { return continueFwdSearch(new Source() { @Override public int length() { return buf.length; } @Override public int get(int i) { return buf[i]; } }, range, offset); } /** * Get count of pattern occurrences in original input. * * @param query Input query. * @return Count of occurrences. */ @Override public long count(Source query) { Range range = bwdSearch(query); return range.second - range.first + 1; } /** * Converts Succinct index (i.e., Compressed Suffix Array index) to file offset. * * @param i Compressed Suffix Array index. * @return File offset. */ @Override public Long succinctIndexToOffset(long i) { return lookupSA(i); } /** * Get count of pattern occurrences in original input. * * @param query Input query. * @return Count of occurrences. */ @Override public long count(final byte[] query) { return count(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } /** * Get count of pattern occurrences in original input. * * @param query Input query. * @return Count of occurrences. */ @Override public long count(final char[] query) { return count(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } @Override public Iterator<Long> searchIterator(Source query) { Range range = bwdSearch(query); return new SearchIterator(this, range); } /** * Search for locations of pattern occurrences in original input. * * @param query Input query. * @return All locations of pattern occurrences in original input. */ @Override public Iterator<Long> searchIterator(final byte[] query) { return searchIterator(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } /** * Search for locations of pattern occurrences in original input. * * @param query Input query. * @return All locations of pattern occurrences in original input. */ @Override public Iterator<Long> searchIterator(final char[] query) { return searchIterator(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } /** * Translate range into SA to recordIds in file. * * @param range Range into SA. * @return Offsets corresponding to recordIds. */ @Override public Long[] rangeToOffsets(Range range) { if (range.empty()) { return new Long[0]; } Long[] offsets = new Long[(int) range.size()]; for (long i = 0; i < range.size(); i++) { offsets[((int) i)] = lookupSA(range.begin() + i); } return offsets; } /** * Get all locations of pattern occurrences in original input. * * @param query Input query. * @return All locations of pattern occurrences in original input. */ @Override public Long[] search(Source query) { return rangeToOffsets(bwdSearch(query)); } /** * Get all locations of pattern occurrences in original input. * * @param query Input query. * @return All locations of pattern occurrences in original input. */ @Override public Long[] search(final byte[] query) { return search(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } /** * Get all locations of pattern occurrences in original input. * * @param query Input query. * @return All locations of pattern occurrences in original input. */ @Override public Long[] search(final char[] query) { return search(new Source() { @Override public int length() { return query.length; } @Override public int get(int i) { return query[i]; } }); } /** * Check if the two offsets belong to the same record. This is always true for the * SuccinctFileBuffer. * * @param firstOffset The first offset. * @param secondOffset The second offset. * @return True if the two offsets belong to the same record, false otherwise. */ @Override public boolean sameRecord(long firstOffset, long secondOffset) { return true; } /** * Performs regular expression search for an input expression using Succinct data-structures. * * @param query Regular expression pattern to be matched. (UTF-8 encoded) * @return All locations and lengths of matching patterns in original input. * @throws RegExParsingException Throws parse exception if query string cannot be parsed */ @Override public Set<RegExMatch> regexSearch(String query) throws RegExParsingException { return new SuccinctRegEx(this, query).compute(); } /** * Reads Succinct data structures from a DataInputStream. * * @param is Stream to read data structures from. * @throws IOException Throws exception if input stream is bad */ @Override public void readFromStream(DataInputStream is) throws IOException { throw new UnsupportedOperationException("Cannot read SuccinctStream from another stream."); } /** * Write Succinct data structures to a DataOutputStream. * * @param os Output stream to write data to. * @throws IOException Throws exception if output stream is bad */ @Override public void writeToStream(DataOutputStream os) throws IOException { byte[] buffer = new byte[1024]; int len; while ((len = originalStream.read(buffer)) != -1) { os.write(buffer, 0, len); } originalStream.seek(0); } }