package org.apache.lucene.index; import org.apache.lucene.util.BytesRef; /** * Copyright 2007 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * The TermVectorMapper can be used to map Term Vectors into your own * structure instead of the parallel array structure used by * {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. * <p/> * It is up to the implementation to make sure it is thread-safe. * * **/ public abstract class TermVectorMapper { private boolean ignoringPositions; private boolean ignoringOffsets; protected TermVectorMapper() { } /** * * @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if they are stored * @param ignoringOffsets similar to ignoringPositions */ protected TermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets) { this.ignoringPositions = ignoringPositions; this.ignoringOffsets = ignoringOffsets; } /** * Tell the mapper what to expect in regards to field, number of terms, offset and position storage. * This method will be called once before retrieving the vector for a field. * * This method will be called before {@link #map(BytesRef,int,TermVectorOffsetInfo[],int[])}. * @param field The field the vector is for * @param numTerms The number of terms that need to be mapped * @param storeOffsets true if the mapper should expect offset information * @param storePositions true if the mapper should expect positions info */ public abstract void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions); /** * Map the Term Vector information into your own structure * @param term The term to add to the vector * @param frequency The frequency of the term in the document * @param offsets null if the offset is not specified, otherwise the offset into the field of the term * @param positions null if the position is not specified, otherwise the position in the field of the term */ public abstract void map(BytesRef term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions); /** * Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they * can be skipped over. Derived classes should set this to true if they want to ignore positions. The default * is false, meaning positions will be loaded if they are stored. * @return false */ public boolean isIgnoringPositions() { return ignoringPositions; } /** * * @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. false by default. * @return false */ public boolean isIgnoringOffsets() { return ignoringOffsets; } /** * Passes down the index of the document whose term vector is currently being mapped, * once for each top level call to a term vector reader. *<p/> * Default implementation IGNORES the document number. Override if your implementation needs the document number. * <p/> * NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. * * @param documentNumber index of document currently being mapped */ public void setDocumentNumber(int documentNumber) { } }