package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.HashMap;
import java.util.ArrayList;
/**
* Information about a segment such as it's name, directory, and files related
* to the segment.
*
* @lucene.experimental
*/
public final class SegmentInfo {
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
/*
* Current generation of del file:
* - NO if there are no deletes
* - YES or higher if there are deletes at generation N
*/
private long delGen;
/*
* Current generation of each field's norm file. If this array is null,
* means no separate norms. If this array is not null, its values mean:
* - NO says this field has no separate norms
* >= YES says this field has separate norms with the specified generation
*/
private long[] normGen;
private boolean isCompoundFile;
private List<String> files; // cached list of files that this segment uses
// in the Directory
long sizeInBytes = -1; // total byte size of all of our files (computed on demand)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
private String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
private Codec codec;
private Map<String,String> diagnostics;
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, int docStoreOffset,
String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
delGen = NO;
this.isCompoundFile = isCompoundFile;
this.docStoreOffset = docStoreOffset;
this.docStoreSegment = docStoreSegment;
this.docStoreIsCompoundFile = docStoreIsCompoundFile;
this.hasProx = hasProx;
this.codec = codec;
delCount = 0;
assert docStoreOffset == -1 || docStoreSegment != null: "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount;
}
/**
* Copy everything from src SegmentInfo into our instance.
*/
void reset(SegmentInfo src) {
clearFiles();
name = src.name;
docCount = src.docCount;
dir = src.dir;
delGen = src.delGen;
docStoreOffset = src.docStoreOffset;
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
if (src.normGen == null) {
normGen = null;
} else {
normGen = new long[src.normGen.length];
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
}
isCompoundFile = src.isCompoundFile;
delCount = src.delCount;
codec = src.codec;
}
void setDiagnostics(Map<String, String> diagnostics) {
this.diagnostics = diagnostics;
}
public Map<String, String> getDiagnostics() {
return diagnostics;
}
/**
* Construct a new SegmentInfo instance by reading a
* previously saved SegmentInfo from input.
* <p>Note: this is public only to allow access from
* the codecs package.</p>
*
* @param dir directory to load from
* @param format format of the segments info file
* @param input input handle to read segment info from
*/
public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
this.dir = dir;
name = input.readString();
docCount = input.readInt();
final String codecName;
delGen = input.readLong();
docStoreOffset = input.readInt();
if (docStoreOffset != -1) {
docStoreSegment = input.readString();
docStoreIsCompoundFile = input.readByte() == YES;
} else {
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
assert 1 == b;
}
int numNormGen = input.readInt();
if (numNormGen == NO) {
normGen = null;
} else {
normGen = new long[numNormGen];
for(int j=0;j<numNormGen;j++) {
normGen[j] = input.readLong();
}
}
isCompoundFile = input.readByte() == YES;
delCount = input.readInt();
assert delCount <= docCount;
hasProx = input.readByte() == YES;
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0)
codecName = input.readString();
else
codecName = "PreFlex";
diagnostics = input.readStringStringMap();
codec = codecs.lookup(codecName);
}
/** Returns total size in bytes of all of files used by
* this segment. */
public long sizeInBytes() throws IOException {
if (sizeInBytes == -1) {
List<String> files = files();
final int size = files.size();
sizeInBytes = 0;
for(int i=0;i<size;i++) {
final String fileName = files.get(i);
// We don't count bytes used by a shared doc store
// against this segment:
if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName))
sizeInBytes += dir.fileLength(fileName);
}
}
return sizeInBytes;
}
public boolean hasDeletions() {
// Cases:
//
// delGen == NO: this means this segment does not have deletions yet
// delGen >= YES: this means this segment has deletions
//
return delGen != NO;
}
void advanceDelGen() {
if (delGen == NO) {
delGen = YES;
} else {
delGen++;
}
clearFiles();
}
void clearDelGen() {
delGen = NO;
clearFiles();
}
@Override
public Object clone() {
SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, hasProx, codec);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.delCount = delCount;
si.hasProx = hasProx;
si.diagnostics = new HashMap<String, String>(diagnostics);
if (normGen != null) {
si.normGen = normGen.clone();
}
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
si.docStoreIsCompoundFile = docStoreIsCompoundFile;
si.codec = codec;
return si;
}
public String getDelFileName() {
if (delGen == NO) {
// In this case we know there is no deletion filename
// against this segment
return null;
} else {
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
}
}
/**
* Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
*
* @param fieldNumber the field index to check
*/
public boolean hasSeparateNorms(int fieldNumber) {
return normGen != null && normGen[fieldNumber] != NO;
}
/**
* Returns true if any fields in this segment have separate norms.
*/
public boolean hasSeparateNorms() {
if (normGen == null) {
return false;
} else {
for (long fieldNormGen : normGen) {
if (fieldNormGen >= YES) {
return true;
}
}
}
return false;
}
void initNormGen(int numFields) {
if (normGen == null) { // normGen is null if this segments file hasn't had any norms set against it yet
normGen = new long[numFields];
Arrays.fill(normGen, NO);
}
}
/**
* Increment the generation count for the norms file for
* this field.
*
* @param fieldIndex field whose norm file will be rewritten
*/
void advanceNormGen(int fieldIndex) {
if (normGen[fieldIndex] == NO) {
normGen[fieldIndex] = YES;
} else {
normGen[fieldIndex]++;
}
clearFiles();
}
/**
* Get the file name for the norms file for this field.
*
* @param number field index
*/
public String getNormFileName(int number) {
if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen[number]);
} else {
// single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
}
}
/**
* Mark whether this segment is stored as a compound file.
*
* @param isCompoundFile true if this is a compound file;
* else, false
*/
void setUseCompoundFile(boolean isCompoundFile) {
this.isCompoundFile = isCompoundFile;
clearFiles();
}
/**
* Returns true if this segment is stored as a compound
* file; else, false.
*/
public boolean getUseCompoundFile() {
return isCompoundFile;
}
public int getDelCount() {
return delCount;
}
void setDelCount(int delCount) {
this.delCount = delCount;
assert delCount <= docCount;
}
public int getDocStoreOffset() {
return docStoreOffset;
}
public boolean getDocStoreIsCompoundFile() {
return docStoreIsCompoundFile;
}
void setDocStoreIsCompoundFile(boolean v) {
docStoreIsCompoundFile = v;
clearFiles();
}
public String getDocStoreSegment() {
return docStoreSegment;
}
void setDocStoreOffset(int offset) {
docStoreOffset = offset;
clearFiles();
}
void setDocStore(int offset, String segment, boolean isCompoundFile) {
docStoreOffset = offset;
docStoreSegment = segment;
docStoreIsCompoundFile = isCompoundFile;
clearFiles();
}
/** Save this segment's info. */
public void write(IndexOutput output)
throws IOException {
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
output.writeInt(docStoreOffset);
if (docStoreOffset != -1) {
output.writeString(docStoreSegment);
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
}
if (normGen == null) {
output.writeInt(NO);
} else {
output.writeInt(normGen.length);
for (long fieldNormGen : normGen) {
output.writeLong(fieldNormGen);
}
}
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
output.writeByte((byte) (hasProx ? 1:0));
output.writeString(codec.name);
output.writeStringStringMap(diagnostics);
}
void setHasProx(boolean hasProx) {
this.hasProx = hasProx;
clearFiles();
}
public boolean getHasProx() {
return hasProx;
}
/** Can only be called once. */
public void setCodec(Codec codec) {
assert this.codec == null;
if (codec == null) {
throw new IllegalArgumentException("codec must be non-null");
}
this.codec = codec;
}
Codec getCodec() {
return codec;
}
private void addIfExists(Set<String> files, String fileName) throws IOException {
if (dir.fileExists(fileName))
files.add(fileName);
}
/*
* Return all files referenced by this SegmentInfo. The
* returns List is a locally cached List so you should not
* modify it.
*/
public List<String> files() throws IOException {
if (files != null) {
// Already cached:
return files;
}
Set<String> fileSet = new HashSet<String>();
boolean useCompoundFile = getUseCompoundFile();
if (useCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
} else {
for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) {
addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext));
}
codec.files(dir, this, fileSet);
}
if (docStoreOffset != -1) {
// We are sharing doc stores (stored fields, term
// vectors) with other segments
assert docStoreSegment != null;
if (docStoreIsCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
} else {
for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS)
addIfExists(fileSet, IndexFileNames.segmentFileName(docStoreSegment, "", ext));
}
} else if (!useCompoundFile) {
for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS)
addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext));
}
String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
fileSet.add(delFileName);
}
if (normGen != null) {
for (int i = 0; i < normGen.length; i++) {
long gen = normGen[i];
if (gen >= YES) {
// Definitely a separate norm file, with generation:
fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
}
}
}
files = new ArrayList<String>(fileSet);
return files;
}
/* Called whenever any change is made that affects which
* files this segment has. */
private void clearFiles() {
files = null;
sizeInBytes = -1;
}
/** {@inheritDoc} */
@Override
public String toString() {
return toString(dir, 0);
}
/** Used for debugging. Format may suddenly change.
*
* <p>Current format looks like
* <code>_a:c45/4->_1</code>, which means the segment's
* name is <code>_a</code>; it's using compound file
* format (would be <code>C</code> if not compound); it
* has 45 documents; it has 4 deletions (this part is
* left off when there are no deletions); it's using the
* shared doc stores named <code>_1</code> (this part is
* left off if doc stores are private).</p>
*/
public String toString(Directory dir, int pendingDelCount) {
StringBuilder s = new StringBuilder();
s.append(name).append(':');
char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
if (this.dir != dir) {
s.append('x');
}
s.append(docCount);
int delCount = getDelCount() + pendingDelCount;
if (delCount != 0) {
s.append('/').append(delCount);
}
if (docStoreOffset != -1) {
s.append("->").append(docStoreSegment);
}
return s.toString();
}
/** We consider another SegmentInfo instance equal if it
* has the same dir and same name. */
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj instanceof SegmentInfo) {
final SegmentInfo other = (SegmentInfo) obj;
return other.dir == dir && other.name.equals(name);
} else {
return false;
}
}
@Override
public int hashCode() {
return dir.hashCode() + name.hashCode();
}
}