package org.archive.hadoop.cdx; import java.io.IOException; import java.util.Iterator; import java.util.logging.Logger; import org.archive.util.iterator.AbstractPeekableIterator; import org.archive.util.iterator.BoundedStringIterator; import org.archive.util.iterator.CloseableIterator; import org.archive.util.iterator.StartBoundedStringIterator; public class ClusterRange extends AbstractPeekableIterator<String> { private final static Logger LOG = Logger.getLogger(ClusterRange.class.getName()); private boolean isFirst; private boolean done; private String start; private String end; private CDXCluster cluster; private CloseableIterator<String> blocks; private Iterator<String> current; public ClusterRange(CDXCluster cluster, String start, String end) throws IOException { this.cluster = cluster; this.start = start; this.end = end; blocks = cluster.getRangeBlockIterator(start, end); done = false; isFirst = true; } @Override public String getNextInner() { try { return getNextWrapper(); } catch (IOException e) { throw new RuntimeException(e); } } public String getNextWrapper() throws IOException { if(done) { return null; } if(current != null) { if(current.hasNext()) { return current.next(); } // done with current: } while(blocks.hasNext()) { String nextLine = blocks.next(); ZipNumBlock block = new ZipNumBlock(nextLine); byte[] compressed = cluster.loadBlock(block); LOG.fine(String.format("Loaded block:%s (%d)(%d)", block.shard,block.start,block.length)); ZipNumBlockIterator zbi = new ZipNumBlockIterator(compressed); Iterator<String> itr = zbi.iterator(); if(isFirst) { isFirst = false; current = new BoundedStringIterator(new StartBoundedStringIterator(itr,start),end); } else { current = new BoundedStringIterator(itr, end); } if(current.hasNext()) { return current.next(); } } done = true; return null; } @Override public void close() throws IOException { blocks.close(); } }