package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
final class TermVectorsTermsWriter extends TermsHashConsumer {
final DocumentsWriter docWriter;
PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
IndexOutput tvx;
IndexOutput tvd;
IndexOutput tvf;
int lastDocID;
boolean hasVectors;
public TermVectorsTermsWriter(DocumentsWriter docWriter) {
this.docWriter = docWriter;
}
@Override
public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
}
@Override
synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
if (tvx != null) {
// At least one doc in this run had term vectors enabled
fill(state.numDocs);
IOUtils.close(tvx, tvf, tvd);
tvx = tvd = tvf = null;
assert state.segmentName != null;
String idxName = IndexFileNames.segmentFileName(state.segmentName, IndexFileNames.VECTORS_INDEX_EXTENSION);
if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
}
lastDocID = 0;
state.hasVectors = hasVectors;
hasVectors = false;
}
for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
for (final TermsHashConsumerPerField field : entry.getValue() ) {
TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
perField.termsHashPerField.reset();
perField.shrinkHash();
}
TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
perThread.termsHashPerThread.reset(true);
}
}
int allocCount;
synchronized PerDoc getPerDoc() {
if (freeCount == 0) {
allocCount++;
if (allocCount > docFreeList.length) {
// Grow our free list up front to make sure we have
// enough space to recycle all outstanding PerDoc
// instances
assert allocCount == 1+docFreeList.length;
docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
}
return new PerDoc();
} else {
return docFreeList[--freeCount];
}
}
/** Fills in no-term-vectors for all docs we haven't seen
* since the last doc that had term vectors. */
void fill(int docID) throws IOException {
if (lastDocID < docID) {
final long tvfPosition = tvf.getFilePointer();
while(lastDocID < docID) {
tvx.writeLong(tvd.getFilePointer());
tvd.writeVInt(0);
tvx.writeLong(tvfPosition);
lastDocID++;
}
}
}
synchronized void initTermVectorsWriter() throws IOException {
if (tvx == null) {
boolean success = false;
try {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
hasVectors = true;
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
}
}
lastDocID = 0;
}
}
synchronized void finishDocument(PerDoc perDoc) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
initTermVectorsWriter();
fill(perDoc.docID);
// Append term vectors to the real outputs:
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
tvd.writeVInt(perDoc.numVectorFields);
if (perDoc.numVectorFields > 0) {
for(int i=0;i<perDoc.numVectorFields;i++) {
tvd.writeVInt(perDoc.fieldNumbers[i]);
}
assert 0 == perDoc.fieldPointers[0];
long lastPos = perDoc.fieldPointers[0];
for(int i=1;i<perDoc.numVectorFields;i++) {
long pos = perDoc.fieldPointers[i];
tvd.writeVLong(pos-lastPos);
lastPos = pos;
}
perDoc.perDocTvf.writeTo(tvf);
perDoc.numVectorFields = 0;
}
assert lastDocID == perDoc.docID: "lastDocID=" + lastDocID + " perDoc.docID=" + perDoc.docID;
lastDocID++;
perDoc.reset();
free(perDoc);
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
@Override
public void abort() {
hasVectors = false;
try {
IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
} catch (IOException e) {
// cannot happen since we suppress exceptions
throw new RuntimeException(e);
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
} catch (IOException ignored) {
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
} catch (IOException ignored) {
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
} catch (IOException ignored) {
}
tvx = tvd = tvf = null;
lastDocID = 0;
}
synchronized void free(PerDoc doc) {
assert freeCount < docFreeList.length;
docFreeList[freeCount++] = doc;
}
class PerDoc extends DocumentsWriter.DocWriter {
final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
int numVectorFields;
int[] fieldNumbers = new int[1];
long[] fieldPointers = new long[1];
void reset() {
perDocTvf.reset();
buffer.recycle();
numVectorFields = 0;
}
@Override
void abort() {
reset();
free(this);
}
void addField(final int fieldNumber) {
if (numVectorFields == fieldNumbers.length) {
fieldNumbers = ArrayUtil.grow(fieldNumbers);
}
if (numVectorFields == fieldPointers.length) {
fieldPointers = ArrayUtil.grow(fieldPointers);
}
fieldNumbers[numVectorFields] = fieldNumber;
fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
numVectorFields++;
}
@Override
public long sizeInBytes() {
return buffer.getSizeInBytes();
}
@Override
public void finish() throws IOException {
finishDocument(this);
}
}
}