package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
final class TermVectorsTermsWriter extends TermsHashConsumer {
final DocumentsWriter docWriter;
TermVectorsWriter termVectorsWriter;
PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
IndexOutput tvx;
IndexOutput tvd;
IndexOutput tvf;
int lastDocID;
public TermVectorsTermsWriter(DocumentsWriter docWriter) {
this.docWriter = docWriter;
}
@Override
public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
}
@Override
synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
if (tvx != null) {
if (state.numDocsInStore > 0)
// In case there are some final documents that we
// didn't see (because they hit a non-aborting exception):
fill(state.numDocsInStore - docWriter.getDocStoreOffset());
tvx.flush();
tvd.flush();
tvf.flush();
}
for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
for (final TermsHashConsumerPerField field : entry.getValue() ) {
TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
perField.termsHashPerField.reset();
perField.shrinkHash();
}
TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
perThread.termsHashPerThread.reset(true);
}
}
@Override
synchronized void closeDocStore(final SegmentWriteState state) throws IOException {
if (tvx != null) {
// At least one doc in this run had term vectors
// enabled
fill(state.numDocsInStore - docWriter.getDocStoreOffset());
tvx.close();
tvf.close();
tvd.close();
tvx = null;
assert state.docStoreSegmentName != null;
String idxName = IndexFileNames.segmentFileName(state.docStoreSegmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
if (4+((long) state.numDocsInStore)*16 != state.directory.fileLength(idxName))
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
String fldName = IndexFileNames.segmentFileName(state.docStoreSegmentName, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
String docName = IndexFileNames.segmentFileName(state.docStoreSegmentName, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
state.flushedFiles.add(idxName);
state.flushedFiles.add(fldName);
state.flushedFiles.add(docName);
docWriter.removeOpenFile(idxName);
docWriter.removeOpenFile(fldName);
docWriter.removeOpenFile(docName);
lastDocID = 0;
}
}
int allocCount;
synchronized PerDoc getPerDoc() {
if (freeCount == 0) {
allocCount++;
if (allocCount > docFreeList.length) {
// Grow our free list up front to make sure we have
// enough space to recycle all outstanding PerDoc
// instances
assert allocCount == 1+docFreeList.length;
docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
}
return new PerDoc();
} else
return docFreeList[--freeCount];
}
/** Fills in no-term-vectors for all docs we haven't seen
* since the last doc that had term vectors. */
void fill(int docID) throws IOException {
final int docStoreOffset = docWriter.getDocStoreOffset();
final int end = docID+docStoreOffset;
if (lastDocID < end) {
final long tvfPosition = tvf.getFilePointer();
while(lastDocID < end) {
tvx.writeLong(tvd.getFilePointer());
tvd.writeVInt(0);
tvx.writeLong(tvfPosition);
lastDocID++;
}
}
}
synchronized void initTermVectorsWriter() throws IOException {
if (tvx == null) {
final String docStoreSegment = docWriter.getDocStoreSegment();
if (docStoreSegment == null)
return;
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
String idxName = IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
String docName = IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
String fldName = IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
tvx = docWriter.directory.createOutput(idxName);
tvd = docWriter.directory.createOutput(docName);
tvf = docWriter.directory.createOutput(fldName);
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
docWriter.addOpenFile(idxName);
docWriter.addOpenFile(fldName);
docWriter.addOpenFile(docName);
lastDocID = 0;
}
}
synchronized void finishDocument(PerDoc perDoc) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
initTermVectorsWriter();
fill(perDoc.docID);
// Append term vectors to the real outputs:
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
tvd.writeVInt(perDoc.numVectorFields);
if (perDoc.numVectorFields > 0) {
for(int i=0;i<perDoc.numVectorFields;i++)
tvd.writeVInt(perDoc.fieldNumbers[i]);
assert 0 == perDoc.fieldPointers[0];
long lastPos = perDoc.fieldPointers[0];
for(int i=1;i<perDoc.numVectorFields;i++) {
long pos = perDoc.fieldPointers[i];
tvd.writeVLong(pos-lastPos);
lastPos = pos;
}
perDoc.perDocTvf.writeTo(tvf);
perDoc.numVectorFields = 0;
}
assert lastDocID == perDoc.docID + docWriter.getDocStoreOffset();
lastDocID++;
perDoc.reset();
free(perDoc);
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
public boolean freeRAM() {
// We don't hold any state beyond one doc, so we don't
// free persistent RAM here
return false;
}
@Override
public void abort() {
if (tvx != null) {
try {
tvx.close();
} catch (Throwable t) {
}
tvx = null;
}
if (tvd != null) {
try {
tvd.close();
} catch (Throwable t) {
}
tvd = null;
}
if (tvf != null) {
try {
tvf.close();
} catch (Throwable t) {
}
tvf = null;
}
lastDocID = 0;
}
synchronized void free(PerDoc doc) {
assert freeCount < docFreeList.length;
docFreeList[freeCount++] = doc;
}
class PerDoc extends DocumentsWriter.DocWriter {
final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
int numVectorFields;
int[] fieldNumbers = new int[1];
long[] fieldPointers = new long[1];
void reset() {
perDocTvf.reset();
buffer.recycle();
numVectorFields = 0;
}
@Override
void abort() {
reset();
free(this);
}
void addField(final int fieldNumber) {
if (numVectorFields == fieldNumbers.length) {
fieldNumbers = ArrayUtil.grow(fieldNumbers);
}
if (numVectorFields == fieldPointers.length) {
fieldPointers = ArrayUtil.grow(fieldPointers);
}
fieldNumbers[numVectorFields] = fieldNumber;
fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
numVectorFields++;
}
@Override
public long sizeInBytes() {
return buffer.getSizeInBytes();
}
@Override
public void finish() throws IOException {
finishDocument(this);
}
}
}