package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import java.util.LinkedList;
import java.util.HashSet;
import java.io.IOException;
/**
* Combines multiple files into a single compound file.
* The file format:<br>
* <ul>
* <li>VInt fileCount</li>
* <li>{Directory}
* fileCount entries with the following structure:</li>
* <ul>
* <li>long dataOffset</li>
* <li>String fileName</li>
* </ul>
* <li>{File Data}
* fileCount entries with the raw data of the corresponding file</li>
* </ul>
*
* The fileCount integer indicates how many files are contained in this compound
* file. The {directory} that follows has that many entries. Each directory entry
* contains a long pointer to the start of this file's data section, and a String
* with that file's name.
*
* @lucene.internal
*/
public final class CompoundFileWriter {
private static final class FileEntry {
/** source file */
String file;
/** temporary holder for the start of directory entry for this file */
long directoryOffset;
/** temporary holder for the start of this file's data section */
long dataOffset;
/** the directory which contains the file. */
Directory dir;
}
// Before versioning started.
static final int FORMAT_PRE_VERSION = 0;
// Segment name is not written in the file names.
static final int FORMAT_NO_SEGMENT_PREFIX = -1;
// NOTE: if you introduce a new format, make it 1 lower
// than the current one, and always change this if you
// switch to a new format!
static final int FORMAT_CURRENT = FORMAT_NO_SEGMENT_PREFIX;
private Directory directory;
private String fileName;
private HashSet<String> ids;
private LinkedList<FileEntry> entries;
private boolean merged = false;
private SegmentMerger.CheckAbort checkAbort;
/** Create the compound stream in the specified file. The file name is the
* entire name (no extensions are added).
* @throws NullPointerException if <code>dir</code> or <code>name</code> is null
*/
public CompoundFileWriter(Directory dir, String name) {
this(dir, name, null);
}
CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
if (dir == null)
throw new NullPointerException("directory cannot be null");
if (name == null)
throw new NullPointerException("name cannot be null");
this.checkAbort = checkAbort;
directory = dir;
fileName = name;
ids = new HashSet<String>();
entries = new LinkedList<FileEntry>();
}
/** Returns the directory of the compound file. */
public Directory getDirectory() {
return directory;
}
/** Returns the name of the compound file. */
public String getName() {
return fileName;
}
/** Add a source stream. <code>file</code> is the string by which the
* sub-stream will be known in the compound stream.
*
* @throws IllegalStateException if this writer is closed
* @throws NullPointerException if <code>file</code> is null
* @throws IllegalArgumentException if a file with the same name
* has been added already
*/
public void addFile(String file) {
addFile(file, directory);
}
/**
* Same as {@link #addFile(String)}, only for files that are found in an
* external {@link Directory}.
*/
public void addFile(String file, Directory dir) {
if (merged)
throw new IllegalStateException(
"Can't add extensions after merge has been called");
if (file == null)
throw new NullPointerException(
"file cannot be null");
if (! ids.add(file))
throw new IllegalArgumentException(
"File " + file + " already added");
FileEntry entry = new FileEntry();
entry.file = file;
entry.dir = dir;
entries.add(entry);
}
/** Merge files with the extensions added up to now.
* All files with these extensions are combined sequentially into the
* compound stream.
* @throws IllegalStateException if close() had been called before or
* if no file has been added to this object
*/
public void close() throws IOException {
if (merged)
throw new IllegalStateException("Merge already performed");
if (entries.isEmpty())
throw new IllegalStateException("No entries to merge have been defined");
merged = true;
// open the compound stream
IndexOutput os = directory.createOutput(fileName);
IOException priorException = null;
try {
// Write the Version info - must be a VInt because CFR reads a VInt
// in older versions!
os.writeVInt(FORMAT_CURRENT);
// Write the number of entries
os.writeVInt(entries.size());
// Write the directory with all offsets at 0.
// Remember the positions of directory entries so that we can
// adjust the offsets later
long totalSize = 0;
for (FileEntry fe : entries) {
fe.directoryOffset = os.getFilePointer();
os.writeLong(0); // for now
os.writeString(IndexFileNames.stripSegmentName(fe.file));
totalSize += fe.dir.fileLength(fe.file);
}
// Pre-allocate size of file as optimization --
// this can potentially help IO performance as
// we write the file and also later during
// searching. It also uncovers a disk-full
// situation earlier and hopefully without
// actually filling disk to 100%:
final long finalLength = totalSize+os.getFilePointer();
os.setLength(finalLength);
// Open the files and copy their data into the stream.
// Remember the locations of each file's data section.
for (FileEntry fe : entries) {
fe.dataOffset = os.getFilePointer();
copyFile(fe, os);
}
// Write the data offsets into the directory of the compound stream
for (FileEntry fe : entries) {
os.seek(fe.directoryOffset);
os.writeLong(fe.dataOffset);
}
assert finalLength == os.length();
// Close the output stream. Set the os to null before trying to
// close so that if an exception occurs during the close, the
// finally clause below will not attempt to close the stream
// the second time.
IndexOutput tmp = os;
os = null;
tmp.close();
} catch (IOException e) {
priorException = e;
} finally {
IOUtils.closeWhileHandlingException(priorException, os);
}
}
/**
* Copy the contents of the file with specified extension into the provided
* output stream.
*/
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
IndexInput is = source.dir.openInput(source.file);
try {
long startPtr = os.getFilePointer();
long length = is.length();
os.copyBytes(is, length);
if (checkAbort != null) {
checkAbort.work(length);
}
// Verify that the output length diff is equal to original file
long endPtr = os.getFilePointer();
long diff = endPtr - startPtr;
if (diff != length)
throw new IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length);
} finally {
is.close();
}
}
}