package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; import java.util.HashSet; import java.util.HashMap; import java.util.ArrayList; /** * Information about a segment such as it's name, directory, and files related * to the segment. * * @lucene.experimental */ public final class SegmentInfo { static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg public Directory dir; // where segment resides /* * Current generation of del file: * - NO if there are no deletes * - YES or higher if there are deletes at generation N */ private long delGen; /* * Current generation of each field's norm file. If this array is null, * means no separate norms. If this array is not null, its values mean: * - NO says this field has no separate norms * >= YES says this field has separate norms with the specified generation */ private long[] normGen; private boolean isCompoundFile; private List<String> files; // cached list of files that this segment uses // in the Directory long sizeInBytes = -1; // total byte size of all of our files (computed on demand) private int docStoreOffset; // if this segment shares stored fields & vectors, this // offset is where in that file this segment's docs begin private String docStoreSegment; // name used to derive fields/vectors file we share with // other segments private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) private int delCount; // How many deleted docs in this segment private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false private Codec codec; private Map<String,String> diagnostics; public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { this.name = name; this.docCount = docCount; this.dir = dir; delGen = NO; this.isCompoundFile = isCompoundFile; this.docStoreOffset = docStoreOffset; this.docStoreSegment = docStoreSegment; this.docStoreIsCompoundFile = docStoreIsCompoundFile; this.hasProx = hasProx; this.codec = codec; delCount = 0; assert docStoreOffset == -1 || docStoreSegment != null: "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount; } /** * Copy everything from src SegmentInfo into our instance. */ void reset(SegmentInfo src) { clearFiles(); name = src.name; docCount = src.docCount; dir = src.dir; delGen = src.delGen; docStoreOffset = src.docStoreOffset; docStoreIsCompoundFile = src.docStoreIsCompoundFile; if (src.normGen == null) { normGen = null; } else { normGen = new long[src.normGen.length]; System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); } isCompoundFile = src.isCompoundFile; delCount = src.delCount; codec = src.codec; } void setDiagnostics(Map<String, String> diagnostics) { this.diagnostics = diagnostics; } public Map<String, String> getDiagnostics() { return diagnostics; } /** * Construct a new SegmentInfo instance by reading a * previously saved SegmentInfo from input. * <p>Note: this is public only to allow access from * the codecs package.</p> * * @param dir directory to load from * @param format format of the segments info file * @param input input handle to read segment info from */ public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException { this.dir = dir; name = input.readString(); docCount = input.readInt(); final String codecName; delGen = input.readLong(); docStoreOffset = input.readInt(); if (docStoreOffset != -1) { docStoreSegment = input.readString(); docStoreIsCompoundFile = input.readByte() == YES; } else { docStoreSegment = name; docStoreIsCompoundFile = false; } if (format > DefaultSegmentInfosWriter.FORMAT_4_0) { // pre-4.0 indexes write a byte if there is a single norms file byte b = input.readByte(); assert 1 == b; } int numNormGen = input.readInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for(int j=0;j<numNormGen;j++) { normGen[j] = input.readLong(); } } isCompoundFile = input.readByte() == YES; delCount = input.readInt(); assert delCount <= docCount; hasProx = input.readByte() == YES; // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) codecName = input.readString(); else codecName = "PreFlex"; diagnostics = input.readStringStringMap(); codec = codecs.lookup(codecName); } /** Returns total size in bytes of all of files used by * this segment. */ public long sizeInBytes() throws IOException { if (sizeInBytes == -1) { List<String> files = files(); final int size = files.size(); sizeInBytes = 0; for(int i=0;i<size;i++) { final String fileName = files.get(i); // We don't count bytes used by a shared doc store // against this segment: if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName)) sizeInBytes += dir.fileLength(fileName); } } return sizeInBytes; } public boolean hasDeletions() { // Cases: // // delGen == NO: this means this segment does not have deletions yet // delGen >= YES: this means this segment has deletions // return delGen != NO; } void advanceDelGen() { if (delGen == NO) { delGen = YES; } else { delGen++; } clearFiles(); } void clearDelGen() { delGen = NO; clearFiles(); } @Override public Object clone() { SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, hasProx, codec); si.isCompoundFile = isCompoundFile; si.delGen = delGen; si.delCount = delCount; si.hasProx = hasProx; si.diagnostics = new HashMap<String, String>(diagnostics); if (normGen != null) { si.normGen = normGen.clone(); } si.docStoreOffset = docStoreOffset; si.docStoreSegment = docStoreSegment; si.docStoreIsCompoundFile = docStoreIsCompoundFile; si.codec = codec; return si; } public String getDelFileName() { if (delGen == NO) { // In this case we know there is no deletion filename // against this segment return null; } else { return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); } } /** * Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX). * * @param fieldNumber the field index to check */ public boolean hasSeparateNorms(int fieldNumber) { return normGen != null && normGen[fieldNumber] != NO; } /** * Returns true if any fields in this segment have separate norms. */ public boolean hasSeparateNorms() { if (normGen == null) { return false; } else { for (long fieldNormGen : normGen) { if (fieldNormGen >= YES) { return true; } } } return false; } void initNormGen(int numFields) { if (normGen == null) { // normGen is null if this segments file hasn't had any norms set against it yet normGen = new long[numFields]; Arrays.fill(normGen, NO); } } /** * Increment the generation count for the norms file for * this field. * * @param fieldIndex field whose norm file will be rewritten */ void advanceNormGen(int fieldIndex) { if (normGen[fieldIndex] == NO) { normGen[fieldIndex] = YES; } else { normGen[fieldIndex]++; } clearFiles(); } /** * Get the file name for the norms file for this field. * * @param number field index */ public String getNormFileName(int number) { if (hasSeparateNorms(number)) { return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen[number]); } else { // single file for all norms return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); } } /** * Mark whether this segment is stored as a compound file. * * @param isCompoundFile true if this is a compound file; * else, false */ void setUseCompoundFile(boolean isCompoundFile) { this.isCompoundFile = isCompoundFile; clearFiles(); } /** * Returns true if this segment is stored as a compound * file; else, false. */ public boolean getUseCompoundFile() { return isCompoundFile; } public int getDelCount() { return delCount; } void setDelCount(int delCount) { this.delCount = delCount; assert delCount <= docCount; } public int getDocStoreOffset() { return docStoreOffset; } public boolean getDocStoreIsCompoundFile() { return docStoreIsCompoundFile; } void setDocStoreIsCompoundFile(boolean v) { docStoreIsCompoundFile = v; clearFiles(); } public String getDocStoreSegment() { return docStoreSegment; } void setDocStoreOffset(int offset) { docStoreOffset = offset; clearFiles(); } void setDocStore(int offset, String segment, boolean isCompoundFile) { docStoreOffset = offset; docStoreSegment = segment; docStoreIsCompoundFile = isCompoundFile; clearFiles(); } /** Save this segment's info. */ public void write(IndexOutput output) throws IOException { assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name; output.writeString(name); output.writeInt(docCount); output.writeLong(delGen); output.writeInt(docStoreOffset); if (docStoreOffset != -1) { output.writeString(docStoreSegment); output.writeByte((byte) (docStoreIsCompoundFile ? 1:0)); } if (normGen == null) { output.writeInt(NO); } else { output.writeInt(normGen.length); for (long fieldNormGen : normGen) { output.writeLong(fieldNormGen); } } output.writeByte((byte) (isCompoundFile ? YES : NO)); output.writeInt(delCount); output.writeByte((byte) (hasProx ? 1:0)); output.writeString(codec.name); output.writeStringStringMap(diagnostics); } void setHasProx(boolean hasProx) { this.hasProx = hasProx; clearFiles(); } public boolean getHasProx() { return hasProx; } /** Can only be called once. */ public void setCodec(Codec codec) { assert this.codec == null; if (codec == null) { throw new IllegalArgumentException("codec must be non-null"); } this.codec = codec; } Codec getCodec() { return codec; } private void addIfExists(Set<String> files, String fileName) throws IOException { if (dir.fileExists(fileName)) files.add(fileName); } /* * Return all files referenced by this SegmentInfo. The * returns List is a locally cached List so you should not * modify it. */ public List<String> files() throws IOException { if (files != null) { // Already cached: return files; } Set<String> fileSet = new HashSet<String>(); boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext)); } codec.files(dir, this, fileSet); } if (docStoreOffset != -1) { // We are sharing doc stores (stored fields, term // vectors) with other segments assert docStoreSegment != null; if (docStoreIsCompoundFile) { fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); } else { for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS) addIfExists(fileSet, IndexFileNames.segmentFileName(docStoreSegment, "", ext)); } } else if (!useCompoundFile) { for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS) addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext)); } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { fileSet.add(delFileName); } if (normGen != null) { for (int i = 0; i < normGen.length; i++) { long gen = normGen[i]; if (gen >= YES) { // Definitely a separate norm file, with generation: fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); } } } files = new ArrayList<String>(fileSet); return files; } /* Called whenever any change is made that affects which * files this segment has. */ private void clearFiles() { files = null; sizeInBytes = -1; } /** {@inheritDoc} */ @Override public String toString() { return toString(dir, 0); } /** Used for debugging. Format may suddenly change. * * <p>Current format looks like * <code>_a:c45/4->_1</code>, which means the segment's * name is <code>_a</code>; it's using compound file * format (would be <code>C</code> if not compound); it * has 45 documents; it has 4 deletions (this part is * left off when there are no deletions); it's using the * shared doc stores named <code>_1</code> (this part is * left off if doc stores are private).</p> */ public String toString(Directory dir, int pendingDelCount) { StringBuilder s = new StringBuilder(); s.append(name).append(':'); char cfs = getUseCompoundFile() ? 'c' : 'C'; s.append(cfs); if (this.dir != dir) { s.append('x'); } s.append(docCount); int delCount = getDelCount() + pendingDelCount; if (delCount != 0) { s.append('/').append(delCount); } if (docStoreOffset != -1) { s.append("->").append(docStoreSegment); } return s.toString(); } /** We consider another SegmentInfo instance equal if it * has the same dir and same name. */ @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj instanceof SegmentInfo) { final SegmentInfo other = (SegmentInfo) obj; return other.dir == dir && other.name.equals(name); } else { return false; } } @Override public int hashCode() { return dir.hashCode() + name.hashCode(); } }