/** * Copyright 2011 Yusuke Matsubara * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wikimedia.wikihadoop; import java.io.*; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.fs.Seekable; public class ByteMatcher { private final InputStream in; private final Seekable pos; private long lastPos; private long currentPos; private long bytes; public ByteMatcher(InputStream in, Seekable pos) throws IOException { this.in = in; this.pos = pos; this.bytes = 0; this.lastPos = -1; this.currentPos = -1; } public ByteMatcher(SeekableInputStream is) throws IOException { this(is, is); } public long getReadBytes() { return this.bytes; } public long getPos() throws IOException { return this.pos.getPos(); } public long getLastUnmatchPos() { return this.lastPos; } public void skip(long len) throws IOException { this.in.skip(len); this.bytes += len; } boolean readUntilMatch(String textPat, DataOutputBuffer outBufOrNull, long end) throws IOException { byte[] match = textPat.getBytes("UTF-8"); int i = 0; while (true) { int b = this.in.read(); // end of file: if (b == -1) { System.err.println("eof 1"); return false; } ++this.bytes; //! TODO: count up later in batch // save to buffer: if (outBufOrNull != null) outBufOrNull.write(b); // check if we're matching: if (b == match[i]) { i++; if (i >= match.length) return true; } else { i = 0; if ( this.currentPos != this.getPos() ) { this.lastPos = this.currentPos; this.currentPos = this.getPos(); } } // see if we've passed the stop point: if (i == 0 && this.pos.getPos() >= end) { System.err.println("eof 2: end=" + end); return false; } } } }