/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.resourceindex.ziplines; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.util.Iterator; import java.util.List; import java.util.logging.Logger; import org.archive.util.io.RuntimeIOException; import org.archive.util.iterator.CloseableIterator; import org.archive.util.zip.OpenJDK7GZIPInputStream; import org.archive.wayback.util.ByteOp; /** * @author brad * */ public class ZiplinesChunkIterator implements CloseableIterator<String> { private static final Logger LOGGER = Logger.getLogger( ZiplinesChunkIterator.class.getName()); private BufferedReader br = null; private Iterator<ZiplinedBlock> blockItr = null; private String cachedNext = null; private boolean truncated = false; /** * @param blocks which should be fetched and unzipped, one after another */ public ZiplinesChunkIterator(List<ZiplinedBlock> blocks) { LOGGER.info("initialized with " + blocks.size() + " blocks"); blockItr = blocks.iterator(); } /* (non-Javadoc) * @see java.util.Iterator#hasNext() */ public boolean hasNext() { if(cachedNext != null) { return true; } while(cachedNext == null) { if(br != null) { // attempt to read the next line from this: try { cachedNext = br.readLine(); if(cachedNext == null) { br = null; // next loop: } else { return true; } } catch (IOException e) { e.printStackTrace(); br = null; } } else { // do we have more blocks to use? if(blockItr.hasNext()) { try { br = blockItr.next().readBlock(); } catch (IOException e) { throw new RuntimeIOException(); } } else { return false; } } } return false; } /* (non-Javadoc) * @see java.util.Iterator#next() */ public String next() { String tmp = cachedNext; cachedNext = null; return tmp; } /* (non-Javadoc) * @see java.util.Iterator#remove() */ public void remove() { throw new UnsupportedOperationException(); } /* (non-Javadoc) * @see java.io.Closeable#close() */ public void close() throws IOException { if(br != null) { br.close(); } } public static void main(String[] args) { if(args.length != 1) { System.err.println("Usage: ZIPLINES_PATH"); System.exit(1); } File f = new File(args[0]); long size = f.length(); long numBlocks = (long) (size / ZiplinedBlock.BLOCK_SIZE); long size2 = numBlocks * ZiplinedBlock.BLOCK_SIZE; if(size != size2) { System.err.println("File size of " + args[0] + " is not a mulitple" + " of " + ZiplinedBlock.BLOCK_SIZE); } try { RandomAccessFile raf = new RandomAccessFile(f, "r"); for(int i = 0; i < numBlocks; i++) { long offset = i * ZiplinedBlock.BLOCK_SIZE; raf.seek(offset); // BufferedReader br = new BufferedReader(new InputStreamReader( // new GZIPInputStream(new FileInputStream(raf.getFD())),ByteOp.UTF8)); BufferedReader br = new BufferedReader(new InputStreamReader( new OpenJDK7GZIPInputStream(new FileInputStream(raf.getFD())),ByteOp.UTF8)); String line = br.readLine(); if(line == null) { System.err.println("Bad block at " + offset + " in " + args[0]); System.exit(1); } System.out.println(args[0] + " " + offset + " " + line); } } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /** * @return the truncated */ public boolean isTruncated() { return truncated; } /** * @param truncated the truncated to set */ public void setTruncated(boolean truncated) { this.truncated = truncated; } }