/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.apache.lucene.util.ArrayUtil; /** * Collection of {@link FieldInfo}s (accessible by number or by name). * @lucene.experimental */ public class FieldInfos implements Iterable<FieldInfo> { private final boolean hasFreq; private final boolean hasProx; private final boolean hasPayloads; private final boolean hasOffsets; private final boolean hasVectors; private final boolean hasNorms; private final boolean hasDocValues; private final boolean hasPointValues; // used only by fieldInfo(int) private final FieldInfo[] byNumberTable; // contiguous private final SortedMap<Integer,FieldInfo> byNumberMap; // sparse private final HashMap<String,FieldInfo> byName = new HashMap<>(); private final Collection<FieldInfo> values; // for an unmodifiable iterator /** * Constructs a new FieldInfos from an array of FieldInfo objects */ public FieldInfos(FieldInfo[] infos) { boolean hasVectors = false; boolean hasProx = false; boolean hasPayloads = false; boolean hasOffsets = false; boolean hasFreq = false; boolean hasNorms = false; boolean hasDocValues = false; boolean hasPointValues = false; TreeMap<Integer, FieldInfo> byNumber = new TreeMap<>(); for (FieldInfo info : infos) { if (info.number < 0) { throw new IllegalArgumentException("illegal field number: " + info.number + " for field " + info.name); } FieldInfo previous = byNumber.put(info.number, info); if (previous != null) { throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number); } previous = byName.put(info.name, info); if (previous != null) { throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name); } hasVectors |= info.hasVectors(); hasProx |= info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; hasFreq |= info.getIndexOptions() != IndexOptions.DOCS; hasOffsets |= info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; hasNorms |= info.hasNorms(); hasDocValues |= info.getDocValuesType() != DocValuesType.NONE; hasPayloads |= info.hasPayloads(); hasPointValues |= (info.getPointDimensionCount() != 0); } this.hasVectors = hasVectors; this.hasProx = hasProx; this.hasPayloads = hasPayloads; this.hasOffsets = hasOffsets; this.hasFreq = hasFreq; this.hasNorms = hasNorms; this.hasDocValues = hasDocValues; this.hasPointValues = hasPointValues; this.values = Collections.unmodifiableCollection(byNumber.values()); Integer max = byNumber.isEmpty() ? null : Collections.max(byNumber.keySet()); // Only usee TreeMap in the very sparse case (< 1/16th of the numbers are used), // because TreeMap uses ~ 64 (32 bit JVM) or 120 (64 bit JVM w/o compressed oops) // overall bytes per entry, but array uses 4 (32 bit JMV) or 8 // (64 bit JVM w/o compressed oops): if (max != null && max < ArrayUtil.MAX_ARRAY_LENGTH && max < 16L*byNumber.size()) { byNumberMap = null; byNumberTable = new FieldInfo[max+1]; for (Map.Entry<Integer,FieldInfo> entry : byNumber.entrySet()) { byNumberTable[entry.getKey()] = entry.getValue(); } } else { byNumberMap = byNumber; byNumberTable = null; } } /** Returns true if any fields have freqs */ public boolean hasFreq() { return hasFreq; } /** Returns true if any fields have positions */ public boolean hasProx() { return hasProx; } /** Returns true if any fields have payloads */ public boolean hasPayloads() { return hasPayloads; } /** Returns true if any fields have offsets */ public boolean hasOffsets() { return hasOffsets; } /** Returns true if any fields have vectors */ public boolean hasVectors() { return hasVectors; } /** Returns true if any fields have norms */ public boolean hasNorms() { return hasNorms; } /** Returns true if any fields have DocValues */ public boolean hasDocValues() { return hasDocValues; } /** Returns true if any fields have PointValues */ public boolean hasPointValues() { return hasPointValues; } /** Returns the number of fields */ public int size() { return byName.size(); } /** * Returns an iterator over all the fieldinfo objects present, * ordered by ascending field number */ // TODO: what happens if in fact a different order is used? @Override public Iterator<FieldInfo> iterator() { return values.iterator(); } /** * Return the fieldinfo object referenced by the field name * @return the FieldInfo object or null when the given fieldName * doesn't exist. */ public FieldInfo fieldInfo(String fieldName) { return byName.get(fieldName); } /** * Return the fieldinfo object referenced by the fieldNumber. * @param fieldNumber field's number. * @return the FieldInfo object or null when the given fieldNumber * doesn't exist. * @throws IllegalArgumentException if fieldNumber is negative */ public FieldInfo fieldInfo(int fieldNumber) { if (fieldNumber < 0) { throw new IllegalArgumentException("Illegal field number: " + fieldNumber); } if (byNumberTable != null) { if (fieldNumber >= byNumberTable.length) { return null; } return byNumberTable[fieldNumber]; } else { return byNumberMap.get(fieldNumber); } } static final class FieldDimensions { public final int dimensionCount; public final int dimensionNumBytes; public FieldDimensions(int dimensionCount, int dimensionNumBytes) { this.dimensionCount = dimensionCount; this.dimensionNumBytes = dimensionNumBytes; } } static final class FieldNumbers { private final Map<Integer,String> numberToName; private final Map<String,Integer> nameToNumber; // We use this to enforce that a given field never // changes DV type, even across segments / IndexWriter // sessions: private final Map<String,DocValuesType> docValuesType; private final Map<String,FieldDimensions> dimensions; // TODO: we should similarly catch an attempt to turn // norms back on after they were already ommitted; today // we silently discard the norm but this is badly trappy private int lowestUnassignedFieldNumber = -1; FieldNumbers() { this.nameToNumber = new HashMap<>(); this.numberToName = new HashMap<>(); this.docValuesType = new HashMap<>(); this.dimensions = new HashMap<>(); } /** * Returns the global field number for the given field name. If the name * does not exist yet it tries to add it with the given preferred field * number assigned if possible otherwise the first unassigned field number * is used as the field number. */ synchronized int addOrGet(String fieldName, int preferredFieldNumber, DocValuesType dvType, int dimensionCount, int dimensionNumBytes) { if (dvType != DocValuesType.NONE) { DocValuesType currentDVType = docValuesType.get(fieldName); if (currentDVType == null) { docValuesType.put(fieldName, dvType); } else if (currentDVType != DocValuesType.NONE && currentDVType != dvType) { throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\""); } } if (dimensionCount != 0) { FieldDimensions dims = dimensions.get(fieldName); if (dims != null) { if (dims.dimensionCount != dimensionCount) { throw new IllegalArgumentException("cannot change point dimension count from " + dims.dimensionCount + " to " + dimensionCount + " for field=\"" + fieldName + "\""); } if (dims.dimensionNumBytes != dimensionNumBytes) { throw new IllegalArgumentException("cannot change point numBytes from " + dims.dimensionNumBytes + " to " + dimensionNumBytes + " for field=\"" + fieldName + "\""); } } else { dimensions.put(fieldName, new FieldDimensions(dimensionCount, dimensionNumBytes)); } } Integer fieldNumber = nameToNumber.get(fieldName); if (fieldNumber == null) { final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber); if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) { // cool - we can use this number globally fieldNumber = preferredBoxed; } else { // find a new FieldNumber while (numberToName.containsKey(++lowestUnassignedFieldNumber)) { // might not be up to date - lets do the work once needed } fieldNumber = lowestUnassignedFieldNumber; } assert fieldNumber >= 0; numberToName.put(fieldNumber, fieldName); nameToNumber.put(fieldName, fieldNumber); } return fieldNumber.intValue(); } synchronized void verifyConsistent(Integer number, String name, DocValuesType dvType) { if (name.equals(numberToName.get(number)) == false) { throw new IllegalArgumentException("field number " + number + " is already mapped to field name \"" + numberToName.get(number) + "\", not \"" + name + "\""); } if (number.equals(nameToNumber.get(name)) == false) { throw new IllegalArgumentException("field name \"" + name + "\" is already mapped to field number \"" + nameToNumber.get(name) + "\", not \"" + number + "\""); } DocValuesType currentDVType = docValuesType.get(name); if (dvType != DocValuesType.NONE && currentDVType != null && currentDVType != DocValuesType.NONE && dvType != currentDVType) { throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + name + "\""); } } synchronized void verifyConsistentDimensions(Integer number, String name, int dimensionCount, int dimensionNumBytes) { if (name.equals(numberToName.get(number)) == false) { throw new IllegalArgumentException("field number " + number + " is already mapped to field name \"" + numberToName.get(number) + "\", not \"" + name + "\""); } if (number.equals(nameToNumber.get(name)) == false) { throw new IllegalArgumentException("field name \"" + name + "\" is already mapped to field number \"" + nameToNumber.get(name) + "\", not \"" + number + "\""); } FieldDimensions dim = dimensions.get(name); if (dim != null) { if (dim.dimensionCount != dimensionCount) { throw new IllegalArgumentException("cannot change point dimension count from " + dim.dimensionCount + " to " + dimensionCount + " for field=\"" + name + "\""); } if (dim.dimensionNumBytes != dimensionNumBytes) { throw new IllegalArgumentException("cannot change point numBytes from " + dim.dimensionNumBytes + " to " + dimensionNumBytes + " for field=\"" + name + "\""); } } } /** * Returns true if the {@code fieldName} exists in the map and is of the * same {@code dvType}. */ synchronized boolean contains(String fieldName, DocValuesType dvType) { // used by IndexWriter.updateNumericDocValue if (!nameToNumber.containsKey(fieldName)) { return false; } else { // only return true if the field has the same dvType as the requested one return dvType == docValuesType.get(fieldName); } } synchronized Set<String> getFieldNames() { return Collections.unmodifiableSet(new HashSet<String>(nameToNumber.keySet())); } synchronized void clear() { numberToName.clear(); nameToNumber.clear(); docValuesType.clear(); dimensions.clear(); } synchronized void setDocValuesType(int number, String name, DocValuesType dvType) { verifyConsistent(number, name, dvType); docValuesType.put(name, dvType); } synchronized void setDimensions(int number, String name, int dimensionCount, int dimensionNumBytes) { if (dimensionNumBytes > PointValues.MAX_NUM_BYTES) { throw new IllegalArgumentException("dimension numBytes must be <= PointValues.MAX_NUM_BYTES (= " + PointValues.MAX_NUM_BYTES + "); got " + dimensionNumBytes + " for field=\"" + name + "\""); } if (dimensionCount > PointValues.MAX_DIMENSIONS) { throw new IllegalArgumentException("pointDimensionCount must be <= PointValues.MAX_DIMENSIONS (= " + PointValues.MAX_DIMENSIONS + "); got " + dimensionCount + " for field=\"" + name + "\""); } verifyConsistentDimensions(number, name, dimensionCount, dimensionNumBytes); dimensions.put(name, new FieldDimensions(dimensionCount, dimensionNumBytes)); } } static final class Builder { private final HashMap<String,FieldInfo> byName = new HashMap<>(); final FieldNumbers globalFieldNumbers; Builder() { this(new FieldNumbers()); } /** * Creates a new instance with the given {@link FieldNumbers}. */ Builder(FieldNumbers globalFieldNumbers) { assert globalFieldNumbers != null; this.globalFieldNumbers = globalFieldNumbers; } public void add(FieldInfos other) { for(FieldInfo fieldInfo : other){ add(fieldInfo); } } /** Create a new field, or return existing one. */ public FieldInfo getOrAdd(String name) { FieldInfo fi = fieldInfo(name); if (fi == null) { // This field wasn't yet added to this in-RAM // segment's FieldInfo, so now we get a global // number for this field. If the field was seen // before then we'll get the same name and number, // else we'll allocate a new one: final int fieldNumber = globalFieldNumbers.addOrGet(name, -1, DocValuesType.NONE, 0, 0); fi = new FieldInfo(name, fieldNumber, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, new HashMap<>(), 0, 0); assert !byName.containsKey(fi.name); globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, DocValuesType.NONE); byName.put(fi.name, fi); } return fi; } private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, int dimensionCount, int dimensionNumBytes) { if (docValues == null) { throw new NullPointerException("DocValuesType must not be null"); } FieldInfo fi = fieldInfo(name); if (fi == null) { // This field wasn't yet added to this in-RAM // segment's FieldInfo, so now we get a global // number for this field. If the field was seen // before then we'll get the same name and number, // else we'll allocate a new one: final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues, dimensionCount, dimensionNumBytes); fi = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, -1, new HashMap<>(), dimensionCount, dimensionNumBytes); assert !byName.containsKey(fi.name); globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType()); byName.put(fi.name, fi); } else { fi.update(storeTermVector, omitNorms, storePayloads, indexOptions, dimensionCount, dimensionNumBytes); if (docValues != DocValuesType.NONE) { // Only pay the synchronization cost if fi does not already have a DVType boolean updateGlobal = fi.getDocValuesType() == DocValuesType.NONE; if (updateGlobal) { // Must also update docValuesType map so it's // aware of this field's DocValuesType. This will throw IllegalArgumentException if // an illegal type change was attempted. globalFieldNumbers.setDocValuesType(fi.number, name, docValues); } fi.setDocValuesType(docValues); // this will also perform the consistency check. } } return fi; } public FieldInfo add(FieldInfo fi) { // IMPORTANT - reuse the field number if possible for consistent field numbers across segments return addOrUpdateInternal(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(), fi.hasPayloads(), fi.getIndexOptions(), fi.getDocValuesType(), fi.getPointDimensionCount(), fi.getPointNumBytes()); } public FieldInfo fieldInfo(String fieldName) { return byName.get(fieldName); } FieldInfos finish() { return new FieldInfos(byName.values().toArray(new FieldInfo[byName.size()])); } } }