package org.archive.format.gzip.zipnum;
import java.io.IOException;
import java.util.logging.Logger;
import org.archive.format.cdx.CDXInputSource;
import org.archive.util.binsearch.SeekableLineReader;
import org.archive.util.binsearch.SortedTextFile;
import org.archive.util.iterator.BoundedStringIterator;
import org.archive.util.iterator.CloseableIterator;
import org.archive.util.iterator.StartBoundedStringIterator;
public class ZipNumCluster implements CDXInputSource {
final static Logger LOGGER = Logger.getLogger(ZipNumCluster.class.getName());
private String clusterRoot;
protected String summaryFile;
protected SortedTextFile summary;
protected String locFile;
protected ZipNumBlockLoader blockLoader;
//protected HashMap<String, String[]> locMap = null;
protected LocationUpdater locationUpdater = null;
protected final static boolean DEFAULT_USE_NIO = true;
protected boolean useNio = DEFAULT_USE_NIO;
protected final static CloseableIterator<String> EMPTY_ITERATOR = new CloseableIterator<String>()
{
@Override
public boolean hasNext() {
return false;
}
@Override
public String next() {
return null;
}
@Override
public void remove() {
}
@Override
public void close() throws IOException {
}
};
public ZipNumCluster()
{
}
public ZipNumCluster(String clusterUri) throws IOException
{
this.clusterRoot = clusterUri;
}
public void init() throws IOException {
if (summaryFile != null) {
this.summary = new SortedTextFile(summaryFile, useNio);
}
if (blockLoader == null) {
this.blockLoader = new ZipNumBlockLoader();
}
if (locFile != null) {
this.locationUpdater = new LocationUpdater(locFile, this.blockLoader);
}
}
protected static int extractLineCount(String line)
{
return (int)extractLongField(line, 4);
}
protected static long extractLongField (String line, int index)
{
String[] parts = line.split("\t");
if (parts.length <= index) {
return -1;
}
long count = -1;
try {
count = Long.parseLong(parts[index]);
} catch (NumberFormatException n) {
}
return count;
}
public String getClusterPart(String partId)
{
if (clusterRoot == null) {
int lastSlash = summaryFile.lastIndexOf('/');
clusterRoot = this.summaryFile.substring(0, lastSlash + 1);
}
return clusterRoot + partId + ".gz";
}
public int getNumLines(String[] blocks)
{
if (blocks.length < 2) {
return 0;
}
int lastLine = -1;
int line = -1;
int size = 0;
for (String block : blocks) {
lastLine = line;
line = extractLineCount(block);
if (lastLine >= 0) {
size += (line - lastLine);
}
}
return size;
}
public long getTotalLength(String[] blocks)
{
long size = 0;
for (String block : blocks) {
size += extractLongField(block, 3);
}
return size;
}
public int getNumLines(String start, String end) throws IOException
{
SeekableLineReader slr = null;
String startLine = null;
String endLine = null;
int startCount = 0;
int endCount = 0;
try {
slr = summary.getSLR();
long[] offsets = summary.getStartEndOffsets(slr, start, end);
if (offsets[0] > 0) {
slr.seek(offsets[0]);
slr.readLine();
startLine = slr.readLine();
}
if (offsets[1] < slr.getSize()) {
slr.seek(offsets[1]);
slr.readLine();
endLine = slr.readLine();
}
if (endLine != null) {
endCount = extractLineCount(endLine);
} else {
//TODO: A bit hacky, try to get last field of last line
slr.seek(slr.getSize() - 100);
endLine = slr.readLine();
int lastSp = endLine.lastIndexOf(' ');
endCount = Integer.parseInt(endLine.substring(lastSp + 1));
}
if (startLine != null) {
startCount = extractLineCount(startLine);
}
} finally {
if (slr != null) {
slr.close();
}
}
return endCount - startCount;
}
//TODO: Experimental?
public long getEstimateSplitSize(String[] blocks)
{
String parts[] = null, lastParts[] = null;
long totalSize = 0;
for (String block : blocks) {
lastParts = parts;
parts = block.split("\t");
if ((lastParts != null) && (parts.length >= 3) && (lastParts.length >= 3)) {
// If same shard, simply subtract
long newOffset = Long.parseLong(parts[2]);
if (parts[1].equals(lastParts[1])) {
long lastOffset = Long.parseLong(lastParts[2]);
totalSize += (newOffset - lastOffset);
} else {
totalSize += newOffset;
//TODO: Compute size of all in between shards
//computeBlockSizeDiff();
}
}
}
return totalSize;
}
public CloseableIterator<String> getClusterRange(String start, String end, boolean inclusive, boolean includePrevLine) throws IOException
{
CloseableIterator<String> iter = null;
iter = summary.getRecordIterator(start, includePrevLine);
return wrapEndIterator(iter, end, inclusive);
//return wrapStartEndIterator(iter, start, end, inclusive);
}
public CloseableIterator<String> wrapStartEndIterator(CloseableIterator<String> iter, String start, String end, boolean inclusive)
{
return wrapEndIterator(wrapStartIterator(iter, start), end, inclusive);
}
public static CloseableIterator<String> wrapStartIterator(CloseableIterator<String> iter, String start)
{
return new StartBoundedStringIterator(iter, start);
}
public static CloseableIterator<String> wrapEndIterator(CloseableIterator<String> iter, String end, boolean inclusive)
{
if (end.isEmpty()) {
return iter;
} else {
return new BoundedStringIterator(iter, end, inclusive);
}
}
public CloseableIterator<String> getCDXIterator(CloseableIterator<String> summaryIterator, String start, String end, int split, int numSplits)
{
return getCDXIterator(summaryIterator, start, end, split, numSplits, null);
}
public CloseableIterator<String> getCDXIterator(CloseableIterator<String> summaryIterator, String start, String end, int split, int numSplits, ZipNumParams params)
{
CloseableIterator<String> blocklines = this.getCDXIterator(summaryIterator, params);
if (split == 0) {
blocklines = wrapStartIterator(blocklines, start);
}
if (split >= (numSplits - 1)) {
blocklines = wrapEndIterator(blocklines, end, false);
}
return blocklines;
}
public static String endKey(String key)
{
return key + "!";
}
public CloseableIterator<String> getLastBlockCDXLineIterator(String key) throws IOException {
// the next line after last key<space> is key! so this will return last key<space> block
CloseableIterator<String> summaryIter = summary.getRecordIteratorLT(endKey(key));
return wrapStartIterator(getCDXIterator(summaryIter), key);
}
public static CloseableIterator<String> wrapPrefix(CloseableIterator<String> source, String prefix, boolean exact)
{
if (exact) {
return wrapEndIterator(source, endKey(prefix), false);
} else {
return wrapEndIterator(source, prefix, true);
}
}
public CloseableIterator<String> getCDXIterator(String key, String start, boolean exact, ZipNumParams params) throws IOException {
if ((locationUpdater != null) && !locationUpdater.dateRangeCheck(key)) {
return EMPTY_ITERATOR;
}
CloseableIterator<String> summaryIter = summary.getRecordIteratorLT(key);
if (params.getTimestampDedupLength() > 0) {
summaryIter = new TimestampDedupIterator(summaryIter, params.getTimestampDedupLength());
}
if (blockLoader.isBufferFully() && (params != null) && (params.getMaxBlocks() > 0)) {
LineBufferingIterator lineBufferIter = new LineBufferingIterator(summaryIter, params.getMaxBlocks());
lineBufferIter.bufferInput();
summaryIter = lineBufferIter;
}
summaryIter = wrapPrefix(summaryIter, start, exact);
return wrapStartIterator(getCDXIterator(summaryIter, params), start);
}
public CloseableIterator<String> getCDXIterator(String key, ZipNumParams params) throws IOException {
CloseableIterator<String> summaryIter = summary.getRecordIteratorLT(key);
return wrapStartIterator(getCDXIterator(summaryIter, params), key);
}
public CloseableIterator<String> getCDXIterator(CloseableIterator<String> summaryIterator, ZipNumParams params)
{
SummaryBlockIterator blockIter = new SummaryBlockIterator(summaryIterator, this, params);
MultiBlockIterator zipIter = new MultiBlockIterator(blockIter);
return zipIter;
}
public CloseableIterator<String> getCDXIterator(CloseableIterator<String> summaryIterator)
{
return getCDXIterator(summaryIterator, null);
}
public void setSummaryFile(String summaryFile) {
this.summaryFile = summaryFile;
}
public String getSummaryFile() {
return summaryFile;
}
public SortedTextFile getSummary()
{
return summary;
}
public ZipNumBlockLoader getBlockLoader() {
return blockLoader;
}
public void setBlockLoader(ZipNumBlockLoader blockLoader) {
this.blockLoader = blockLoader;
}
public boolean isUseNio() {
return useNio;
}
public void setUseNio(boolean useNio) {
this.useNio = useNio;
}
public String getLocFile() {
return locFile;
}
public void setLocFile(String locFile) {
this.locFile = locFile;
}
}