/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep; import com.google.common.base.Charsets; import com.indeed.util.core.io.Closeables2; import com.indeed.imhotep.api.RawFTGSIterator; import org.apache.log4j.Logger; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.CharsetDecoder; public class InputStreamFTGSIterator implements RawFTGSIterator { private static final Logger log = Logger.getLogger(RawFTGSIterator.class); private final byte[] buffer = new byte[32768]; private int bufferPtr = 0; private int bufferLen = 0; private byte readByte() throws IOException { if (bufferPtr == bufferLen) { refillBuffer(); } return buffer[bufferPtr++]; } private void readBytes(byte[] b, int off, int len) throws IOException { while (len > 0) { if (bufferPtr == bufferLen) { refillBuffer(); } final int toCopy = Math.min(len, bufferLen - bufferPtr); System.arraycopy(buffer, bufferPtr, b, off, toCopy); bufferPtr += toCopy; off += toCopy; len -= toCopy; } } private void refillBuffer() throws IOException { bufferLen = in.read(buffer); if (bufferLen == -1) { throw new IOException("Unexpected end of stream"); } bufferPtr = 0; } private int readVInt() throws IOException { int ret = 0; int shift = 0; while (true) { final byte val = readByte(); ret += (val&0x7F)<<shift; if (val >= 0) break; shift += 7; } return ret; } private long readVLong(byte firstByte) throws IOException { long ret = 0; int shift = 0; byte val = firstByte; while (true) { ret += (val&0x7FL)<<shift; if (val >= 0) break; shift += 7; val = readByte(); } return ret; } private long readSVLong() throws IOException { final long ret = readVLong(readByte()); return (ret >>> 1) ^ -(ret & 1); } private int iteratorStatus = 1; // 0 = end, 1 = reading fields, 2 = reading terms, 3 = reading groups private final InputStream in; public InputStreamFTGSIterator(InputStream in, int numStats) { this.in = in; this.statsBuf = new long[numStats]; } private String fieldName; private boolean fieldIsIntType; private long termDocFreq; private long intTermVal; private String stringTermVal; // needed to receive byte[] deltas over inputstream private byte[] currentTermBytes = new byte[16]; private ByteBuffer byteBuffer = ByteBuffer.wrap(currentTermBytes); private int currentTermLength; private int groupId = -1; private final long[] statsBuf; private final CharsetDecoder decoder = Charsets.UTF_8.newDecoder(); @Override public boolean nextField() { if (iteratorStatus < 1) return false; while (nextTerm()) { // skip until end of current field reached.... } // try to read next field from input stream... try { internalNextField(); } catch (IOException e) { iteratorStatus = -1; throw new RuntimeException(e); } return iteratorStatus == 2; } private void internalNextField() throws IOException { final int fieldType = readByte() & 0xFF; if (fieldType == 0) { iteratorStatus = 0; return; // normal end of stream condition } fieldIsIntType = fieldType == 1; final int fieldNameLength = readVInt(); final byte[] fieldNameBytes = new byte[fieldNameLength]; readBytes(fieldNameBytes, 0, fieldNameBytes.length); fieldName = new String(fieldNameBytes, Charsets.UTF_8); intTermVal = -1; stringTermVal = null; currentTermLength = 0; groupId = -1; iteratorStatus = 2; } @Override public final String fieldName() { return fieldName; } @Override public final boolean fieldIsIntType() { return fieldIsIntType; } @Override public final boolean nextTerm() { if (iteratorStatus < 2) return false; while (nextGroup()) { // skip until end of current term } try { internalNextTerm(); } catch (IOException e) { iteratorStatus = -1; throw new RuntimeException(e); } return iteratorStatus == 3; } private void internalNextTerm() throws IOException { if (fieldIsIntType) { final byte firstByte = readByte(); if (firstByte == 0) { iteratorStatus = 1; return; } intTermVal += readVLong(firstByte); } else { final int removeLengthPlusOne = readVInt(); final int addLength = readVInt(); if (removeLengthPlusOne == 0 && addLength == 0) { iteratorStatus = 1; return; } final int removeLength = removeLengthPlusOne - 1; final int newLength = currentTermLength - removeLength + addLength; if (currentTermBytes.length < newLength) { final byte[] temp = new byte[Math.max(currentTermBytes.length*2, newLength)]; System.arraycopy(currentTermBytes, 0, temp, 0, currentTermLength); currentTermBytes = temp; byteBuffer = ByteBuffer.wrap(currentTermBytes); } readBytes(currentTermBytes, currentTermLength - removeLength, addLength); currentTermLength = newLength; stringTermVal = null; } termDocFreq = readSVLong(); groupId = -1; iteratorStatus = 3; } @Override public final long termDocFreq() { return termDocFreq; } @Override public final long termIntVal() { return intTermVal; } @Override public final String termStringVal() { if (stringTermVal == null) { try { stringTermVal = decoder.decode((ByteBuffer)byteBuffer.position(0).limit(currentTermLength)).toString(); } catch (CharacterCodingException e) { throw new RuntimeException(e); } } return stringTermVal; } @Override public final byte[] termStringBytes() { return currentTermBytes; } @Override public final int termStringLength() { return currentTermLength; } @Override public final boolean nextGroup() { if (iteratorStatus < 3) { return false; } try { final int grpDelta = readVInt(); if (grpDelta == 0) { iteratorStatus = 2; } else { groupId += grpDelta; for (int i = 0; i < statsBuf.length; i++) { statsBuf[i] = readSVLong(); } } } catch (IOException e) { iteratorStatus = -1; throw new RuntimeException(e); } return iteratorStatus == 3; } @Override public final int group() { return groupId; } @Override public final void groupStats(long[] stats) { System.arraycopy(statsBuf, 0, stats, 0, statsBuf.length); } @Override public void close() { Closeables2.closeQuietly(in, log); } }