/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.util.flatfile; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.util.Comparator; import java.util.Iterator; import org.archive.util.iterator.CloseableIterator; import org.archive.wayback.resourceindex.cdx.CDXIndex; import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CompositeSortedIterator; /** * Subclass of File, which allows binary searching, returning Iterators that * allow scanning forwards and backwards thru the (sorted) file starting from a * particular prefix. * * @author brad * @version $Date$, $Revision$ */ public class FlatFile { private static final long serialVersionUID = 6174187801001601557L; private long lastMatchOffset; protected File file = null; /** * */ public FlatFile() { } /** * @param parent * @param child */ public FlatFile(File parent, String child) { file = new File(parent, child); } /** * @param path */ public FlatFile(String path) { file = new File(path); } /** * @param path * to set */ public void setPath(String path) { file = new File(path); } /** * @return current String path, or null if none has been set */ public String getPath() { if (file == null) { return null; } return file.getAbsolutePath(); } /** * Binary search thru RandomAccessFile argument to locate the first line * prefixed by key argument. As a side effect, the RandomAccessFile's * position is also set to the start of the first matching line. * * @param fh * @param key * @return long offset where first record prefixed with key is found * @throws IOException */ public long findKeyOffset(RandomAccessFile fh, String key) throws IOException { int blockSize = 8192; long fileSize = fh.length(); long min = 0; long max = (long) fileSize / blockSize; long mid; String line; while (max - min > 1) { mid = min + (long) ((max - min) / 2); fh.seek(mid * blockSize); if (mid > 0) line = fh.readLine(); // probably a partial line line = fh.readLine(); if (key.compareTo(line) > 0) { min = mid; } else { max = mid; } } // find the right line min = min * blockSize; fh.seek(min); if (min > 0) line = fh.readLine(); while (true) { min = fh.getFilePointer(); line = fh.readLine(); if (line == null) break; if (line.compareTo(key) >= 0) break; } fh.seek(min); return min; } public long findKeyOffsetLT(RandomAccessFile fh, String key) throws IOException { int blockSize = 8192; long fileSize = fh.length(); long min = 0; long max = (long) fileSize / blockSize; long mid; String line; while (max - min > 1) { mid = min + (long) ((max - min) / 2); fh.seek(mid * blockSize); if (mid > 0) line = fh.readLine(); // probably a partial line line = fh.readLine(); if (key.compareTo(line) > 0) { min = mid; } else { max = mid; } } // find the right line min = min * blockSize; fh.seek(min); if (min > 0) line = fh.readLine(); long last = min; while (true) { min = fh.getFilePointer(); line = fh.readLine(); if (line == null) break; if (line.compareTo(key) >= 0) break; last = min; } fh.seek(last); return last; } /** * @return Returns the lastMatchOffset. */ public long getLastMatchOffset() { return lastMatchOffset; } /** * @return Iterator returning one String object for each line in the file. * @throws IOException */ public CloseableIterator<String> getSequentialIterator() throws IOException { BufferedReader br = new BufferedReader(new FileReader(file)); return new RecordIterator(br); } /** * @param prefix * @return Iterator for records beggining with key * @throws IOException */ public CloseableIterator<String> getRecordIterator(final String prefix) throws IOException { RecordIterator itr = null; RandomAccessFile raf = new RandomAccessFile(file, "r"); long offset = findKeyOffset(raf, prefix); lastMatchOffset = offset; FileInputStream is = new FileInputStream(raf.getFD()); InputStreamReader isr = new InputStreamReader(is, ByteOp.UTF8); BufferedReader br = new BufferedReader(isr); itr = new RecordIterator(br); return itr; } public CloseableIterator<String> getRecordIteratorLT(final String prefix) throws IOException { RecordIterator itr = null; RandomAccessFile raf = new RandomAccessFile(file, "r"); long offset = findKeyOffsetLT(raf, prefix); lastMatchOffset = offset; BufferedReader br = new BufferedReader(new FileReader(raf.getFD())); itr = new RecordIterator(br); return itr; } /** * * @param prefix * @return ReverseRecordIterator positioned to return the first line BEFORE * prefix at the first call to readPrevLine(). * @throws IOException */ public ReverseRecordIterator getReverseRecordIterator(final String prefix) throws IOException { ReverseRecordIterator itr = null; RandomAccessFile raf = new RandomAccessFile(file, "r"); long offset = findKeyOffset(raf, prefix); if (offset < 1) { raf.close(); return new ReverseRecordIterator(null); } raf.seek(raf.getFilePointer() - 1); lastMatchOffset = offset - 1; itr = new ReverseRecordIterator(new ReverseBufferedReader(raf)); return itr; } public void store(Iterator<String> itr) throws IOException { PrintWriter pw = new PrintWriter(file); while (itr.hasNext()) { pw.println(itr.next()); } pw.close(); } /** * Considers FlatFile objects equivalent if their paths * are equivalent. */ public boolean equals(Object obj) { if (!(obj instanceof FlatFile)) { return false; } return ((FlatFile) obj).getPath().equals(this.getPath()); } private static void USAGE() { System.err.println("Usage: PREFIX FILE1 [FILE2] ..."); System.exit(3); } /** * @param args */ public static void main(String[] args) { if (args.length < 2) { USAGE(); } String prefix = args[0]; CloseableIterator<String> itr; try { if (args.length == 2) { FlatFile ff = new FlatFile(args[1]); itr = (RecordIterator) ff.getRecordIterator(prefix); } else { Comparator<String> comp = new Comparator<String>() { public int compare(String o1, String o2) { return o1.compareTo(o2); } }; CompositeSortedIterator<String> csi = new CompositeSortedIterator<String>( comp); RecordIterator fitr; for (int i = 1; i < args.length; i++) { FlatFile ff = new FlatFile(args[i]); fitr = (RecordIterator) ff.getRecordIterator(prefix); csi.addComponent(fitr); } itr = csi; } while (itr.hasNext()) { String line = (String) itr.next(); if (!line.startsWith(prefix)) { break; } System.out.println(line); } } catch (IOException e) { e.printStackTrace(); } } }