/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.fst; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; // TODO: merge with PagedBytes, except PagedBytes doesn't // let you read while writing which FST needs class BytesStore extends DataOutput implements Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesStore.class) + RamUsageEstimator.shallowSizeOfInstance(ArrayList.class); private final List<byte[]> blocks = new ArrayList<>(); private final int blockSize; private final int blockBits; private final int blockMask; private byte[] current; private int nextWrite; public BytesStore(int blockBits) { this.blockBits = blockBits; blockSize = 1 << blockBits; blockMask = blockSize-1; nextWrite = blockSize; } /** Pulls bytes from the provided IndexInput. */ public BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException { int blockSize = 2; int blockBits = 1; while(blockSize < numBytes && blockSize < maxBlockSize) { blockSize *= 2; blockBits++; } this.blockBits = blockBits; this.blockSize = blockSize; this.blockMask = blockSize-1; long left = numBytes; while(left > 0) { final int chunk = (int) Math.min(blockSize, left); byte[] block = new byte[chunk]; in.readBytes(block, 0, block.length); blocks.add(block); left -= chunk; } // So .getPosition still works nextWrite = blocks.get(blocks.size()-1).length; } /** Absolute write byte; you must ensure dest is < max * position written so far. */ public void writeByte(int dest, byte b) { int blockIndex = dest >> blockBits; byte[] block = blocks.get(blockIndex); block[dest & blockMask] = b; } @Override public void writeByte(byte b) { if (nextWrite == blockSize) { current = new byte[blockSize]; blocks.add(current); nextWrite = 0; } current[nextWrite++] = b; } @Override public void writeBytes(byte[] b, int offset, int len) { while (len > 0) { int chunk = blockSize - nextWrite; if (len <= chunk) { assert b != null; assert current != null; System.arraycopy(b, offset, current, nextWrite, len); nextWrite += len; break; } else { if (chunk > 0) { System.arraycopy(b, offset, current, nextWrite, chunk); offset += chunk; len -= chunk; } current = new byte[blockSize]; blocks.add(current); nextWrite = 0; } } } int getBlockBits() { return blockBits; } /** Absolute writeBytes without changing the current * position. Note: this cannot "grow" the bytes, so you * must only call it on already written parts. */ void writeBytes(long dest, byte[] b, int offset, int len) { //System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len); assert dest + len <= getPosition(): "dest=" + dest + " pos=" + getPosition() + " len=" + len; // Note: weird: must go "backwards" because copyBytes // calls us with overlapping src/dest. If we // go forwards then we overwrite bytes before we can // copy them: /* int blockIndex = dest >> blockBits; int upto = dest & blockMask; byte[] block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize - upto; System.out.println(" cycle chunk=" + chunk + " len=" + len); if (len <= chunk) { System.arraycopy(b, offset, block, upto, len); break; } else { System.arraycopy(b, offset, block, upto, chunk); offset += chunk; len -= chunk; blockIndex++; block = blocks.get(blockIndex); upto = 0; } } */ final long end = dest + len; int blockIndex = (int) (end >> blockBits); int downTo = (int) (end & blockMask); if (downTo == 0) { blockIndex--; downTo = blockSize; } byte[] block = blocks.get(blockIndex); while (len > 0) { //System.out.println(" cycle downTo=" + downTo + " len=" + len); if (len <= downTo) { //System.out.println(" final: offset=" + offset + " len=" + len + " dest=" + (downTo-len)); System.arraycopy(b, offset, block, downTo-len, len); break; } else { len -= downTo; //System.out.println(" partial: offset=" + (offset + len) + " len=" + downTo + " dest=0"); System.arraycopy(b, offset + len, block, 0, downTo); blockIndex--; block = blocks.get(blockIndex); downTo = blockSize; } } } /** Absolute copy bytes self to self, without changing the * position. Note: this cannot "grow" the bytes, so must * only call it on already written parts. */ public void copyBytes(long src, long dest, int len) { //System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len); assert src < dest; // Note: weird: must go "backwards" because copyBytes // calls us with overlapping src/dest. If we // go forwards then we overwrite bytes before we can // copy them: /* int blockIndex = src >> blockBits; int upto = src & blockMask; byte[] block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize - upto; System.out.println(" cycle: chunk=" + chunk + " len=" + len); if (len <= chunk) { writeBytes(dest, block, upto, len); break; } else { writeBytes(dest, block, upto, chunk); blockIndex++; block = blocks.get(blockIndex); upto = 0; len -= chunk; dest += chunk; } } */ long end = src + len; int blockIndex = (int) (end >> blockBits); int downTo = (int) (end & blockMask); if (downTo == 0) { blockIndex--; downTo = blockSize; } byte[] block = blocks.get(blockIndex); while (len > 0) { //System.out.println(" cycle downTo=" + downTo); if (len <= downTo) { //System.out.println(" finish"); writeBytes(dest, block, downTo-len, len); break; } else { //System.out.println(" partial"); len -= downTo; writeBytes(dest + len, block, 0, downTo); blockIndex--; block = blocks.get(blockIndex); downTo = blockSize; } } } /** Writes an int at the absolute position without * changing the current pointer. */ public void writeInt(long pos, int value) { int blockIndex = (int) (pos >> blockBits); int upto = (int) (pos & blockMask); byte[] block = blocks.get(blockIndex); int shift = 24; for(int i=0;i<4;i++) { block[upto++] = (byte) (value >> shift); shift -= 8; if (upto == blockSize) { upto = 0; blockIndex++; block = blocks.get(blockIndex); } } } /** Reverse from srcPos, inclusive, to destPos, inclusive. */ public void reverse(long srcPos, long destPos) { assert srcPos < destPos; assert destPos < getPosition(); //System.out.println("reverse src=" + srcPos + " dest=" + destPos); int srcBlockIndex = (int) (srcPos >> blockBits); int src = (int) (srcPos & blockMask); byte[] srcBlock = blocks.get(srcBlockIndex); int destBlockIndex = (int) (destPos >> blockBits); int dest = (int) (destPos & blockMask); byte[] destBlock = blocks.get(destBlockIndex); //System.out.println(" srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex); int limit = (int) (destPos - srcPos + 1)/2; for(int i=0;i<limit;i++) { //System.out.println(" cycle src=" + src + " dest=" + dest); byte b = srcBlock[src]; srcBlock[src] = destBlock[dest]; destBlock[dest] = b; src++; if (src == blockSize) { srcBlockIndex++; srcBlock = blocks.get(srcBlockIndex); //System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock); src = 0; } dest--; if (dest == -1) { destBlockIndex--; destBlock = blocks.get(destBlockIndex); //System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock); dest = blockSize-1; } } } public void skipBytes(int len) { while (len > 0) { int chunk = blockSize - nextWrite; if (len <= chunk) { nextWrite += len; break; } else { len -= chunk; current = new byte[blockSize]; blocks.add(current); nextWrite = 0; } } } public long getPosition() { return ((long) blocks.size()-1) * blockSize + nextWrite; } /** Pos must be less than the max position written so far! * Ie, you cannot "grow" the file with this! */ public void truncate(long newLen) { assert newLen <= getPosition(); assert newLen >= 0; int blockIndex = (int) (newLen >> blockBits); nextWrite = (int) (newLen & blockMask); if (nextWrite == 0) { blockIndex--; nextWrite = blockSize; } blocks.subList(blockIndex+1, blocks.size()).clear(); if (newLen == 0) { current = null; } else { current = blocks.get(blockIndex); } assert newLen == getPosition(); } public void finish() { if (current != null) { byte[] lastBuffer = new byte[nextWrite]; System.arraycopy(current, 0, lastBuffer, 0, nextWrite); blocks.set(blocks.size()-1, lastBuffer); current = null; } } /** Writes all of our bytes to the target {@link DataOutput}. */ public void writeTo(DataOutput out) throws IOException { for(byte[] block : blocks) { out.writeBytes(block, 0, block.length); } } public FST.BytesReader getForwardReader() { if (blocks.size() == 1) { return new ForwardBytesReader(blocks.get(0)); } return new FST.BytesReader() { private byte[] current; private int nextBuffer; private int nextRead = blockSize; @Override public byte readByte() { if (nextRead == blockSize) { current = blocks.get(nextBuffer++); nextRead = 0; } return current[nextRead++]; } @Override public void skipBytes(long count) { setPosition(getPosition() + count); } @Override public void readBytes(byte[] b, int offset, int len) { while(len > 0) { int chunkLeft = blockSize - nextRead; if (len <= chunkLeft) { System.arraycopy(current, nextRead, b, offset, len); nextRead += len; break; } else { if (chunkLeft > 0) { System.arraycopy(current, nextRead, b, offset, chunkLeft); offset += chunkLeft; len -= chunkLeft; } current = blocks.get(nextBuffer++); nextRead = 0; } } } @Override public long getPosition() { return ((long) nextBuffer-1)*blockSize + nextRead; } @Override public void setPosition(long pos) { int bufferIndex = (int) (pos >> blockBits); nextBuffer = bufferIndex+1; current = blocks.get(bufferIndex); nextRead = (int) (pos & blockMask); assert getPosition() == pos; } @Override public boolean reversed() { return false; } }; } public FST.BytesReader getReverseReader() { return getReverseReader(true); } FST.BytesReader getReverseReader(boolean allowSingle) { if (allowSingle && blocks.size() == 1) { return new ReverseBytesReader(blocks.get(0)); } return new FST.BytesReader() { private byte[] current = blocks.size() == 0 ? null : blocks.get(0); private int nextBuffer = -1; private int nextRead = 0; @Override public byte readByte() { if (nextRead == -1) { current = blocks.get(nextBuffer--); nextRead = blockSize-1; } return current[nextRead--]; } @Override public void skipBytes(long count) { setPosition(getPosition() - count); } @Override public void readBytes(byte[] b, int offset, int len) { for(int i=0;i<len;i++) { b[offset+i] = readByte(); } } @Override public long getPosition() { return ((long) nextBuffer+1)*blockSize + nextRead; } @Override public void setPosition(long pos) { // NOTE: a little weird because if you // setPosition(0), the next byte you read is // bytes[0] ... but I would expect bytes[-1] (ie, // EOF)...? int bufferIndex = (int) (pos >> blockBits); nextBuffer = bufferIndex-1; current = blocks.get(bufferIndex); nextRead = (int) (pos & blockMask); assert getPosition() == pos: "pos=" + pos + " getPos()=" + getPosition(); } @Override public boolean reversed() { return true; } }; } @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED; for (byte[] block : blocks) { size += RamUsageEstimator.sizeOf(block); } return size; } @Override public String toString() { return getClass().getSimpleName() + "(numBlocks=" + blocks.size() + ")"; } }