package org.apache.lucene.util.fst; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.blur.lucene.fst.ByteArray; import org.apache.blur.lucene.fst.ByteArrayFactory; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; // TODO: merge with PagedBytes, except PagedBytes doesn't // let you read while writing which FST needs class BytesStore extends DataOutput { private final List<ByteArray> blocks = new ArrayList<ByteArray>(); private final static ByteArrayFactory factory = ByteArrayFactory.getDefaultFactory(); private final int blockSize; private final int blockBits; private final int blockMask; private ByteArray current; private int nextWrite; public BytesStore(int blockBits) { this.blockBits = blockBits; blockSize = 1 << blockBits; blockMask = blockSize - 1; nextWrite = blockSize; } /** Pulls bytes from the provided IndexInput. */ public BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException { int blockSize = 2; int blockBits = 1; while (blockSize < numBytes && blockSize < maxBlockSize) { blockSize *= 2; blockBits++; } this.blockBits = blockBits; this.blockSize = blockSize; this.blockMask = blockSize - 1; long left = numBytes; while (left > 0) { final int chunk = (int) Math.min(blockSize, left); ByteArray block = factory.newByteArray(chunk); block.readBytes(in, 0, block.length()); blocks.add(block); left -= chunk; } // So .getPosition still works nextWrite = blocks.get(blocks.size() - 1).length(); } /** * Absolute write byte; you must ensure dest is < max position written so far. */ public void writeByte(int dest, byte b) { int blockIndex = dest >> blockBits; ByteArray block = blocks.get(blockIndex); block.put(dest & blockMask, b); } @Override public void writeByte(byte b) { if (nextWrite == blockSize) { current = factory.newByteArray(blockSize); blocks.add(current); nextWrite = 0; } current.put(nextWrite++, b); } @Override public void writeBytes(byte[] b, int offset, int len) { while (len > 0) { int chunk = blockSize - nextWrite; if (len <= chunk) { arraycopy(b, offset, current, nextWrite, len); nextWrite += len; break; } else { if (chunk > 0) { arraycopy(b, offset, current, nextWrite, chunk); offset += chunk; len -= chunk; } current = factory.newByteArray(blockSize); blocks.add(current); nextWrite = 0; } } } int getBlockBits() { return blockBits; } void writeBytes(long dest, byte[] b, int offset, int len) { ByteArray byteArray = factory.newByteArray(len); byteArray.put(0, b, offset, len); writeBytes(dest, byteArray, 0, len); } /** * Absolute writeBytes without changing the current position. Note: this * cannot "grow" the bytes, so you must only call it on already written parts. */ void writeBytes(long dest, ByteArray b, int offset, int len) { // System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + // " len=" + len); assert dest + len <= getPosition() : "dest=" + dest + " pos=" + getPosition() + " len=" + len; // Note: weird: must go "backwards" because copyBytes // calls us with overlapping src/dest. If we // go forwards then we overwrite bytes before we can // copy them: /* * int blockIndex = dest >> blockBits; int upto = dest & blockMask; byte[] * block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize - * upto; System.out.println(" cycle chunk=" + chunk + " len=" + len); if * (len <= chunk) { System.arraycopy(b, offset, block, upto, len); break; } * else { System.arraycopy(b, offset, block, upto, chunk); offset += chunk; * len -= chunk; blockIndex++; block = blocks.get(blockIndex); upto = 0; } } */ final long end = dest + len; int blockIndex = (int) (end >> blockBits); int downTo = (int) (end & blockMask); if (downTo == 0) { blockIndex--; downTo = blockSize; } ByteArray block = blocks.get(blockIndex); while (len > 0) { // System.out.println(" cycle downTo=" + downTo + " len=" + len); if (len <= downTo) { // System.out.println(" final: offset=" + offset + " len=" + len + // " dest=" + (downTo-len)); arraycopy(b, offset, block, downTo - len, len); break; } else { len -= downTo; // System.out.println(" partial: offset=" + (offset + len) + // " len=" + downTo + " dest=0"); arraycopy(b, offset + len, block, 0, downTo); blockIndex--; block = blocks.get(blockIndex); downTo = blockSize; } } } /** * Absolute copy bytes self to self, without changing the position. Note: this * cannot "grow" the bytes, so must only call it on already written parts. */ public void copyBytes(long src, long dest, int len) { // System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" // + len); assert src < dest; // Note: weird: must go "backwards" because copyBytes // calls us with overlapping src/dest. If we // go forwards then we overwrite bytes before we can // copy them: /* * int blockIndex = src >> blockBits; int upto = src & blockMask; byte[] * block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize - * upto; System.out.println(" cycle: chunk=" + chunk + " len=" + len); if * (len <= chunk) { writeBytes(dest, block, upto, len); break; } else { * writeBytes(dest, block, upto, chunk); blockIndex++; block = * blocks.get(blockIndex); upto = 0; len -= chunk; dest += chunk; } } */ long end = src + len; int blockIndex = (int) (end >> blockBits); int downTo = (int) (end & blockMask); if (downTo == 0) { blockIndex--; downTo = blockSize; } ByteArray block = blocks.get(blockIndex); while (len > 0) { // System.out.println(" cycle downTo=" + downTo); if (len <= downTo) { // System.out.println(" finish"); writeBytes(dest, block, downTo - len, len); break; } else { // System.out.println(" partial"); len -= downTo; writeBytes(dest + len, block, 0, downTo); blockIndex--; block = blocks.get(blockIndex); downTo = blockSize; } } } /** * Writes an int at the absolute position without changing the current * pointer. */ public void writeInt(long pos, int value) { int blockIndex = (int) (pos >> blockBits); int upto = (int) (pos & blockMask); ByteArray block = blocks.get(blockIndex); int shift = 24; for (int i = 0; i < 4; i++) { block.put(upto++, (byte) (value >> shift)); shift -= 8; if (upto == blockSize) { upto = 0; blockIndex++; block = blocks.get(blockIndex); } } } /** Reverse from srcPos, inclusive, to destPos, inclusive. */ public void reverse(long srcPos, long destPos) { assert srcPos < destPos; assert destPos < getPosition(); // System.out.println("reverse src=" + srcPos + " dest=" + destPos); int srcBlockIndex = (int) (srcPos >> blockBits); int src = (int) (srcPos & blockMask); ByteArray srcBlock = blocks.get(srcBlockIndex); int destBlockIndex = (int) (destPos >> blockBits); int dest = (int) (destPos & blockMask); ByteArray destBlock = blocks.get(destBlockIndex); // System.out.println(" srcBlock=" + srcBlockIndex + " destBlock=" + // destBlockIndex); int limit = (int) (destPos - srcPos + 1) / 2; for (int i = 0; i < limit; i++) { // System.out.println(" cycle src=" + src + " dest=" + dest); byte b = srcBlock.get(src); srcBlock.put(src, destBlock.get(dest)); destBlock.put(dest, b); src++; if (src == blockSize) { srcBlockIndex++; srcBlock = blocks.get(srcBlockIndex); // System.out.println(" set destBlock=" + destBlock + " srcBlock=" + // srcBlock); src = 0; } dest--; if (dest == -1) { destBlockIndex--; destBlock = blocks.get(destBlockIndex); // System.out.println(" set destBlock=" + destBlock + " srcBlock=" + // srcBlock); dest = blockSize - 1; } } } public void skipBytes(int len) { while (len > 0) { int chunk = blockSize - nextWrite; if (len <= chunk) { nextWrite += len; break; } else { len -= chunk; current = factory.newByteArray(blockSize); blocks.add(current); nextWrite = 0; } } } public long getPosition() { return ((long) blocks.size() - 1) * blockSize + nextWrite; } /** * Pos must be less than the max position written so far! Ie, you cannot * "grow" the file with this! */ public void truncate(long newLen) { assert newLen <= getPosition(); assert newLen >= 0; int blockIndex = (int) (newLen >> blockBits); nextWrite = (int) (newLen & blockMask); if (nextWrite == 0) { blockIndex--; nextWrite = blockSize; } blocks.subList(blockIndex + 1, blocks.size()).clear(); if (newLen == 0) { current = null; } else { current = blocks.get(blockIndex); } assert newLen == getPosition(); } public void finish() { if (current != null) { ByteArray lastBuffer = factory.newByteArray(nextWrite); arraycopy(current, 0, lastBuffer, 0, nextWrite); blocks.set(blocks.size() - 1, lastBuffer); current = null; } } /** Writes all of our bytes to the target {@link DataOutput}. */ public void writeTo(DataOutput out) throws IOException { for (ByteArray block : blocks) { block.writeBytes(out, 0, block.length()); } } public FST.BytesReader getForwardReader() { if (blocks.size() == 1) { return new ForwardBytesReaderByteArray(blocks.get(0)); } return new FST.BytesReader() { private ByteArray current; private int nextBuffer; private int nextRead = blockSize; @Override public byte readByte() { if (nextRead == blockSize) { current = blocks.get(nextBuffer++); nextRead = 0; } return current.get(nextRead++); } @Override public void skipBytes(int count) { setPosition(getPosition() + count); } @Override public void readBytes(byte[] b, int offset, int len) { while (len > 0) { int chunkLeft = blockSize - nextRead; if (len <= chunkLeft) { arraycopy(current, nextRead, b, offset, len); nextRead += len; break; } else { if (chunkLeft > 0) { arraycopy(current, nextRead, b, offset, chunkLeft); offset += chunkLeft; len -= chunkLeft; } current = blocks.get(nextBuffer++); nextRead = 0; } } } @Override public long getPosition() { return ((long) nextBuffer - 1) * blockSize + nextRead; } @Override public void setPosition(long pos) { int bufferIndex = (int) (pos >> blockBits); nextBuffer = bufferIndex + 1; current = blocks.get(bufferIndex); nextRead = (int) (pos & blockMask); assert getPosition() == pos; } @Override public boolean reversed() { return false; } }; } public FST.BytesReader getReverseReader() { return getReverseReader(true); } FST.BytesReader getReverseReader(boolean allowSingle) { if (allowSingle && blocks.size() == 1) { return new ReverseBytesReaderByteArray(blocks.get(0)); } return new FST.BytesReader() { private ByteArray current = blocks.size() == 0 ? null : blocks.get(0); private int nextBuffer = -1; private int nextRead = 0; @Override public byte readByte() { if (nextRead == -1) { current = blocks.get(nextBuffer--); nextRead = blockSize - 1; } return current.get(nextRead--); } @Override public void skipBytes(int count) { setPosition(getPosition() - count); } @Override public void readBytes(byte[] b, int offset, int len) { for (int i = 0; i < len; i++) { b[offset + i] = readByte(); } } @Override public long getPosition() { return ((long) nextBuffer + 1) * blockSize + nextRead; } @Override public void setPosition(long pos) { // NOTE: a little weird because if you // setPosition(0), the next byte you read is // bytes[0] ... but I would expect bytes[-1] (ie, // EOF)...? int bufferIndex = (int) (pos >> blockBits); nextBuffer = bufferIndex - 1; current = blocks.get(bufferIndex); nextRead = (int) (pos & blockMask); assert getPosition() == pos : "pos=" + pos + " getPos()=" + getPosition(); } @Override public boolean reversed() { return true; } }; } static class ForwardBytesReaderByteArray extends FST.BytesReader { private final ByteArray bytes; private int pos; public ForwardBytesReaderByteArray(ByteArray bytes) { this.bytes = bytes; } @Override public byte readByte() { return bytes.get(pos++); } @Override public void readBytes(byte[] b, int offset, int len) { arraycopy(bytes, pos, b, offset, len); pos += len; } @Override public void skipBytes(int count) { pos += count; } @Override public long getPosition() { return pos; } @Override public void setPosition(long pos) { this.pos = (int) pos; } @Override public boolean reversed() { return false; } } static class ReverseBytesReaderByteArray extends FST.BytesReader { private final ByteArray bytes; private int pos; public ReverseBytesReaderByteArray(ByteArray bytes) { this.bytes = bytes; } @Override public byte readByte() { return bytes.get(pos--); } @Override public void readBytes(byte[] b, int offset, int len) { for (int i = 0; i < len; i++) { b[offset + i] = bytes.get(pos--); } } @Override public void skipBytes(int count) { pos -= count; } @Override public long getPosition() { return pos; } @Override public void setPosition(long pos) { this.pos = (int) pos; } @Override public boolean reversed() { return true; } } public static void arraycopy(ByteArray src, int srcOffset, byte[] dest, int destOffset, int len) { src.get(srcOffset, dest, destOffset, len); } public static void arraycopy(ByteArray src, int srcOffset, ByteArray dest, int destOffset, int len) { src.copy(srcOffset, dest, destOffset, len); } public static void arraycopy(byte[] src, int srcOffset, ByteArray dest, int destOffset, int len) { dest.put(destOffset, src, srcOffset, len); } }