package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
import java.util.*;
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
* @lucene.experimental
*/
public final class FieldInfos {
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
// whenever you add a new format, make it 1 smaller (negative version logic)!
static final int FORMAT_CURRENT = FORMAT_START;
static final int FORMAT_MINIMUM = FORMAT_START;
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
private int format;
public FieldInfos() { }
/**
* Construct a FieldInfos object using the directory and the name of the file
* IndexInput
* @param d The directory to open the IndexInput from
* @param name The name of the file to open the IndexInput from in the Directory
* @throws IOException
*/
public FieldInfos(Directory d, String name) throws IOException {
IndexInput input = d.openInput(name);
try {
read(input, name);
} finally {
input.close();
}
}
/**
* Returns a deep clone of this FieldInfos instance.
*/
@Override
synchronized public Object clone() {
FieldInfos fis = new FieldInfos();
final int numField = byNumber.size();
for(int i=0;i<numField;i++) {
FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
fis.byNumber.add(fi);
fis.byName.put(fi.name, fi);
}
return fis;
}
/** Adds field info for a Document. */
synchronized public void add(Document doc) {
List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) {
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
}
}
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
final int numFields = byNumber.size();
for(int i=0;i<numFields;i++) {
final FieldInfo fi = fieldInfo(i);
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
}
}
return false;
}
/**
* Add fields that are indexed. Whether they have termvectors has to be specified.
*
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
* @param storePositionWithTermVector true if positions should be stored.
* @param storeOffsetWithTermVector true if offsets should be stored
*/
synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector) {
for (String name : names) {
add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
/**
* Assumes the fields are not storing term vectors.
*
* @param names The names of the fields
* @param isIndexed Whether the fields are indexed or not
*
* @see #add(String, boolean)
*/
synchronized public void add(Collection<String> names, boolean isIndexed) {
for (String name : names) {
add(name, isIndexed);
}
}
/**
* Calls 5 parameter add with false for all TermVector parameters.
*
* @param name The name of the Fieldable
* @param isIndexed true if the field is indexed
* @see #add(String, boolean, boolean, boolean, boolean)
*/
synchronized public void add(String name, boolean isIndexed) {
add(name, isIndexed, false, false, false, false);
}
/**
* Calls 5 parameter add with false for term vector positions and offsets.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
add(name, isIndexed, storeTermVector, false, false, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
add(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
* @param storePayloads true if payloads should be stored for this field
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
*/
synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
return fi;
}
private FieldInfo addInternal(String name, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
name = StringHelper.intern(name);
FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
byNumber.add(fi);
byName.put(name, fi);
return fi;
}
public int fieldNumber(String fieldName) {
FieldInfo fi = fieldInfo(fieldName);
return (fi != null) ? fi.number : -1;
}
public FieldInfo fieldInfo(String fieldName) {
return byName.get(fieldName);
}
/**
* Return the fieldName identified by its number.
*
* @param fieldNumber
* @return the fieldName or an empty string when the field
* with the given number doesn't exist.
*/
public String fieldName(int fieldNumber) {
FieldInfo fi = fieldInfo(fieldNumber);
return (fi != null) ? fi.name : "";
}
/**
* Return the fieldinfo object referenced by the fieldNumber.
* @param fieldNumber
* @return the FieldInfo object or null when the given fieldNumber
* doesn't exist.
*/
public FieldInfo fieldInfo(int fieldNumber) {
return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
}
public int size() {
return byNumber.size();
}
public boolean hasVectors() {
boolean hasVectors = false;
for (int i = 0; i < size(); i++) {
if (fieldInfo(i).storeTermVector) {
hasVectors = true;
break;
}
}
return hasVectors;
}
public void write(Directory d, String name) throws IOException {
IndexOutput output = d.createOutput(name);
try {
write(output);
} finally {
output.close();
}
}
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
for (int i = 0; i < size(); i++) {
FieldInfo fi = fieldInfo(i);
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
if (fi.omitNorms) bits |= OMIT_NORMS;
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
output.writeString(fi.name);
output.writeByte(bits);
}
}
private void read(IndexInput input, String fileName) throws IOException {
format = input.readVInt();
if (format > FORMAT_MINIMUM) {
throw new IndexFormatTooOldException(fileName, format, FORMAT_MINIMUM, FORMAT_CURRENT);
}
if (format < FORMAT_CURRENT) {
throw new IndexFormatTooNewException(fileName, format, FORMAT_MINIMUM, FORMAT_CURRENT);
}
final int size = input.readVInt(); //read in the size
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length());
}
}
}