final TermsHashPerThread perThread; final DocumentsWriter.DocState docState; final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; BytesRef termBytesRef; // Copied from our perThread final IntBlockPool intPool; final ByteBlockPool bytePool; final ByteBlockPool termBytePool; final int streamCount; final int numPostingInt; final FieldInfo fieldInfo; final BytesRefHash bytesHash; ParallelPostingsArray postingsArray; private final AtomicLong bytesUsed; public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { this.perThread = perThread; intPool = perThread.intPool; bytePool = perThread.bytePool; termBytePool = perThread.termBytePool; docState = perThread.docState; bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong(); fieldState = docInverterPerField.fieldState; this.consumer = perThread.consumer.addField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; if (nextPerThread != null) nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); else nextPerField = null; } void shrinkHash(int targetSize) { // Fully free the bytesHash on each flush but keep the pool untouched // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too bytesHash.clear(false); } public void reset() { bytesHash.clear(false); if (nextPerField != null) nextPerField.reset(); } @Override synchronized public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); } public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; int intStart = postingsArray.intStarts[termID]; final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK; reader.init(bytePool, postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } /** Collapse the hash table & sort in-place. */ public int[] sortPostings(Comparator<BytesRef> termComp) { return bytesHash.sort(termComp); } private boolean doCall; private boolean doNextCall; @Override void start(Fieldable f) { termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class); termBytesRef = termAtt.getBytesRef(); consumer.start(f); if (nextPerField != null) { nextPerField.start(f); } } @Override boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); bytesHash.reinit(); if (nextPerField != null) doNextCall = nextPerField.start(fields, count); return doCall || doNextCall; } // Secondary entry point (for 2nd & subsequent TermsHash), // because token text has already been "interned" into // textStart, so we hash by textStart public void add(int textStart) throws IOException { int termID = bytesHash.addByPoolOffset(textStart); if (termID >= 0) { // New posting // First time we are seeing this token since we last // flushed the hash. // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) intPool.nextBuffer(); if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.nextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; consumer.newTerm(termID); } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(termID); } } // Primary entry point (for first TermsHash) @Override void add() throws IOException { // We are first in the chain so we must "intern" the // term text into textStart address // Get the text & hash of this term. int termID; try{ termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); }catch (MaxBytesLengthExceededException e) { // Not enough room in current block // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter // can be inserted into the analyzer chain if // other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; } } consumer.skippingLongTerm(); return; } if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; consumer.newTerm(termID); } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(termID); } if (doNextCall) nextPerField.add(postingsArray.textStarts[termID]); } int[] intUptos; int intUptoStart; void writeByte(int stream, byte b) { int upto = intUptos[intUptoStart+stream]; byte[] bytes = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT]; assert bytes != null; int offset = upto & ByteBlockPool.BYTE_BLOCK_MASK; if (bytes[offset] != 0) { // End of slice; allocate a new one offset = bytePool.allocSlice(bytes, offset); bytes = bytePool.buffer; intUptos[intUptoStart+stream] = offset + bytePool.byteOffset; } bytes[offset] = b; (intUptos[intUptoStart+stream])++; } public void writeBytes(int stream, byte[] b, int offset, int len) { // TODO: optimize final int end = offset + len; for(int i=offset;i<end;i++) writeByte(stream, b[i]); } void writeVInt(int stream, int i) { assert stream < streamCount; while ((i & ~0x7F) != 0) { writeByte(stream, (byte)((i & 0x7f) | 0x80)); i >>>= 7; } writeByte(stream, (byte) i); } @Override void finish() throws IOException { consumer.finish(); if (nextPerField != null) nextPerField.finish(); } private static final class PostingsBytesStartArray extends BytesStartArray { private final TermsHashPerField perField; private final AtomicLong bytesUsed; private PostingsBytesStartArray( TermsHashPerField perField, AtomicLong bytesUsed) { this.perField = perField; this.bytesUsed = bytesUsed; } @Override public int[] init() { if(perField.postingsArray == null) { perField.postingsArray = perField.consumer.createPostingsArray(2); bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); } return perField.postingsArray.textStarts; } @Override public int[] grow() { ParallelPostingsArray postingsArray = perField.postingsArray; final int oldSize = perField.postingsArray.size; postingsArray = perField.postingsArray = postingsArray.grow(); bytesUsed .addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize))); return postingsArray.textStarts; } @Override public int[] clear() { if(perField.postingsArray != null) { bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); perField.postingsArray = null; } return null; } @Override public AtomicLong bytesUsed() { return bytesUsed; } } }