package org.apache.lucene.index; /* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Transparent access to the vector space model, * either via TermFreqVector or by resolving it from the inverted index. * <p/> * Resolving a term vector from a large index can be a time consuming process. * <p/> * Warning! This class is not thread safe! */ public class TermVectorAccessor { public TermVectorAccessor() { } /** * Instance reused to save garbage collector some time */ private TermVectorMapperDecorator decoratedMapper = new TermVectorMapperDecorator(); /** * Visits the TermVectorMapper and populates it with terms available for a given document, * either via a vector created at index time or by resolving them from the inverted index. * * @param indexReader Index source * @param documentNumber Source document to access * @param fieldName Field to resolve * @param mapper Mapper to be mapped with data * @throws IOException */ public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException { fieldName = StringHelper.intern(fieldName); decoratedMapper.decorated = mapper; decoratedMapper.termVectorStored = false; indexReader.getTermFreqVector(documentNumber, fieldName, decoratedMapper); if (!decoratedMapper.termVectorStored) { mapper.setDocumentNumber(documentNumber); build(indexReader, fieldName, mapper, documentNumber); } } /** Instance reused to save garbage collector some time */ private List<BytesRef> tokens; /** Instance reused to save garbage collector some time */ private List<int[]> positions; /** Instance reused to save garbage collector some time */ private List<Integer> frequencies; /** * Populates the mapper with terms available for the given field in a document * by resolving the inverted index. * * @param indexReader * @param field interned field name * @param mapper * @param documentNumber * @throws IOException */ private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException { if (tokens == null) { tokens = new ArrayList<BytesRef>(500); positions = new ArrayList<int[]>(500); frequencies = new ArrayList<Integer>(500); } else { tokens.clear(); frequencies.clear(); positions.clear(); } final Bits delDocs = MultiFields.getDeletedDocs(indexReader); Terms terms = MultiFields.getTerms(indexReader, field); boolean anyTerms = false; if (terms != null) { TermsEnum termsEnum = terms.iterator(); DocsEnum docs = null; DocsAndPositionsEnum postings = null; while(true) { BytesRef text = termsEnum.next(); if (text != null) { anyTerms = true; if (!mapper.isIgnoringPositions()) { docs = postings = termsEnum.docsAndPositions(delDocs, postings); } else { docs = termsEnum.docs(delDocs, docs); } int docID = docs.advance(documentNumber); if (docID == documentNumber) { frequencies.add(Integer.valueOf(docs.freq())); tokens.add(new BytesRef(text)); if (!mapper.isIgnoringPositions()) { int[] positions = new int[docs.freq()]; for (int i = 0; i < positions.length; i++) { positions[i] = postings.nextPosition(); } this.positions.add(positions); } else { positions.add(null); } } } else { break; } } if (anyTerms) { mapper.setDocumentNumber(documentNumber); mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions()); for (int i = 0; i < tokens.size(); i++) { mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i)); } } } } private static class TermVectorMapperDecorator extends TermVectorMapper { private TermVectorMapper decorated; @Override public boolean isIgnoringPositions() { return decorated.isIgnoringPositions(); } @Override public boolean isIgnoringOffsets() { return decorated.isIgnoringOffsets(); } private boolean termVectorStored = false; @Override public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) { decorated.setExpectations(field, numTerms, storeOffsets, storePositions); termVectorStored = true; } @Override public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { decorated.map(term, frequency, offsets, positions); } @Override public void setDocumentNumber(int documentNumber) { decorated.setDocumentNumber(documentNumber); } } }