package org.apache.lucene.index;
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Transparent access to the vector space model,
* either via TermFreqVector or by resolving it from the inverted index.
* <p/>
* Resolving a term vector from a large index can be a time consuming process.
* <p/>
* Warning! This class is not thread safe!
*/
public class TermVectorAccessor {
public TermVectorAccessor() {
}
/**
* Instance reused to save garbage collector some time
*/
private TermVectorMapperDecorator decoratedMapper = new TermVectorMapperDecorator();
/**
* Visits the TermVectorMapper and populates it with terms available for a given document,
* either via a vector created at index time or by resolving them from the inverted index.
*
* @param indexReader Index source
* @param documentNumber Source document to access
* @param fieldName Field to resolve
* @param mapper Mapper to be mapped with data
* @throws IOException
*/
public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {
fieldName = StringHelper.intern(fieldName);
decoratedMapper.decorated = mapper;
decoratedMapper.termVectorStored = false;
indexReader.getTermFreqVector(documentNumber, fieldName, decoratedMapper);
if (!decoratedMapper.termVectorStored) {
mapper.setDocumentNumber(documentNumber);
build(indexReader, fieldName, mapper, documentNumber);
}
}
/** Instance reused to save garbage collector some time */
private List<BytesRef> tokens;
/** Instance reused to save garbage collector some time */
private List<int[]> positions;
/** Instance reused to save garbage collector some time */
private List<Integer> frequencies;
/**
* Populates the mapper with terms available for the given field in a document
* by resolving the inverted index.
*
* @param indexReader
* @param field interned field name
* @param mapper
* @param documentNumber
* @throws IOException
*/
private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
if (tokens == null) {
tokens = new ArrayList<BytesRef>(500);
positions = new ArrayList<int[]>(500);
frequencies = new ArrayList<Integer>(500);
} else {
tokens.clear();
frequencies.clear();
positions.clear();
}
final Bits delDocs = MultiFields.getDeletedDocs(indexReader);
Terms terms = MultiFields.getTerms(indexReader, field);
boolean anyTerms = false;
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
DocsEnum docs = null;
DocsAndPositionsEnum postings = null;
while(true) {
BytesRef text = termsEnum.next();
if (text != null) {
anyTerms = true;
if (!mapper.isIgnoringPositions()) {
docs = postings = termsEnum.docsAndPositions(delDocs, postings);
} else {
docs = termsEnum.docs(delDocs, docs);
}
int docID = docs.advance(documentNumber);
if (docID == documentNumber) {
frequencies.add(Integer.valueOf(docs.freq()));
tokens.add(new BytesRef(text));
if (!mapper.isIgnoringPositions()) {
int[] positions = new int[docs.freq()];
for (int i = 0; i < positions.length; i++) {
positions[i] = postings.nextPosition();
}
this.positions.add(positions);
} else {
positions.add(null);
}
}
} else {
break;
}
}
if (anyTerms) {
mapper.setDocumentNumber(documentNumber);
mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
for (int i = 0; i < tokens.size(); i++) {
mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
}
}
}
}
private static class TermVectorMapperDecorator extends TermVectorMapper {
private TermVectorMapper decorated;
@Override
public boolean isIgnoringPositions() {
return decorated.isIgnoringPositions();
}
@Override
public boolean isIgnoringOffsets() {
return decorated.isIgnoringOffsets();
}
private boolean termVectorStored = false;
@Override
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
decorated.setExpectations(field, numTerms, storeOffsets, storePositions);
termVectorStored = true;
}
@Override
public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
decorated.map(term, frequency, offsets, positions);
}
@Override
public void setDocumentNumber(int documentNumber) {
decorated.setDocumentNumber(documentNumber);
}
}
}