package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.StringHelper; import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; import java.text.NumberFormat; import java.io.PrintStream; import java.io.IOException; import java.io.File; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.ArrayList; import java.util.Map; /** * Basic tool and API to check the health of an index and * write a new segments file that removes reference to * problematic segments. * * <p>As this tool checks every byte in the index, on a large * index it can take quite a long time to run. * * @lucene.experimental Please make a complete backup of your * index before using this to fix your index! */ public class CheckIndex { private PrintStream infoStream; private Directory dir; /** * Returned from {@link #checkIndex()} detailing the health and status of the index. * * @lucene.experimental **/ public static class Status { /** True if no problems were found with the index. */ public boolean clean; /** True if we were unable to locate and load the segments_N file. */ public boolean missingSegments; /** True if we were unable to open the segments_N file. */ public boolean cantOpenSegments; /** True if we were unable to read the version number from segments_N file. */ public boolean missingSegmentVersion; /** Name of latest segments_N file in the index. */ public String segmentsFileName; /** Number of segments in the index. */ public int numSegments; /** String description of the version of the index. */ public String segmentFormat; /** Empty unless you passed specific segments list to check as optional 3rd argument. * @see CheckIndex#checkIndex(List) */ public List<String> segmentsChecked = new ArrayList<String>(); /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ public boolean toolOutOfDate; /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>(); /** Directory index is in. */ public Directory dir; /** * SegmentInfos instance containing only segments that * had no problems (this is used with the {@link CheckIndex#fixIndex} * method to repair the index. */ SegmentInfos newSegments; /** How many documents will be lost to bad segments. */ public int totLoseDocCount; /** How many bad segments were found. */ public int numBadSegments; /** True if we checked only specific segments ({@link * #checkIndex(List)}) was called with non-null * argument). */ public boolean partial; /** The greatest segment name. */ public int maxSegmentName; /** Whether the SegmentInfos.counter is greater than any of the segments' names. */ public boolean validCounter; /** Holds the userData of the last commit in the index */ public Map<String, String> userData; /** Holds the status of each segment in the index. * See {@link #segmentInfos}. * * <p><b>WARNING</b>: this API is new and experimental and is * subject to suddenly change in the next release. */ public static class SegmentInfoStatus { /** Name of the segment. */ public String name; /** Document count (does not take deletions into account). */ public int docCount; /** True if segment is compound file format. */ public boolean compound; /** Number of files referenced by this segment. */ public int numFiles; /** Net size (MB) of the files referenced by this * segment. */ public double sizeMB; /** Doc store offset, if this segment shares the doc * store files (stored fields and term vectors) with * other segments. This is -1 if it does not share. */ public int docStoreOffset = -1; /** String of the shared doc store segment, or null if * this segment does not share the doc store files. */ public String docStoreSegment; /** True if the shared doc store files are compound file * format. */ public boolean docStoreCompoundFile; /** True if this segment has pending deletions. */ public boolean hasDeletions; /** Name of the current deletions file name. */ public String deletionsFileName; /** Number of deleted documents. */ public int numDeleted; /** True if we were able to open a SegmentReader on this * segment. */ public boolean openReaderPassed; /** Number of fields in this segment. */ int numFields; /** True if at least one of the fields in this segment * has position data * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */ public boolean hasProx; /** Map that includes certain * debugging details that IndexWriter records into * each segment it creates */ public Map<String,String> diagnostics; /** Status for testing of field norms (null if field norms could not be tested). */ public FieldNormStatus fieldNormStatus; /** Status for testing of indexed terms (null if indexed terms could not be tested). */ public TermIndexStatus termIndexStatus; /** Status for testing of stored fields (null if stored fields could not be tested). */ public StoredFieldStatus storedFieldStatus; /** Status for testing of term vectors (null if term vectors could not be tested). */ public TermVectorStatus termVectorStatus; } /** * Status from testing field norms. */ public static final class FieldNormStatus { /** Number of fields successfully tested */ public long totFields = 0L; /** Exception thrown during term index test (null on success) */ public Throwable error = null; } /** * Status from testing term index. */ public static final class TermIndexStatus { /** Total term count */ public long termCount = 0L; /** Total frequency across all terms. */ public long totFreq = 0L; /** Total number of positions. */ public long totPos = 0L; /** Exception thrown during term index test (null on success) */ public Throwable error = null; } /** * Status from testing stored fields. */ public static final class StoredFieldStatus { /** Number of documents tested. */ public int docCount = 0; /** Total number of stored fields tested. */ public long totFields = 0; /** Exception thrown during stored fields test (null on success) */ public Throwable error = null; } /** * Status from testing stored fields. */ public static final class TermVectorStatus { /** Number of documents tested. */ public int docCount = 0; /** Total number of term vectors tested. */ public long totVectors = 0; /** Exception thrown during term vector test (null on success) */ public Throwable error = null; } } /** Create a new CheckIndex on the directory. */ public CheckIndex(Directory dir) { this.dir = dir; infoStream = null; } /** Set infoStream where messages should go. If null, no * messages are printed */ public void setInfoStream(PrintStream out) { infoStream = out; } private void msg(String msg) { if (infoStream != null) infoStream.println(msg); } private static class MySegmentTermDocs extends SegmentTermDocs { int delCount; MySegmentTermDocs(SegmentReader p) { super(p,-1); } @Override public void seek(Term term) throws IOException { super.seek(term); delCount = 0; } @Override protected void skippingDoc() throws IOException { delCount++; } } /** Returns a {@link Status} instance detailing * the state of the index. * * <p>As this method checks every byte in the index, on a large * index it can take quite a long time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex() throws IOException { return checkIndex(null); } /** Returns a {@link Status} instance detailing * the state of the index. * * @param onlySegments list of specific segment names to check * * <p>As this method checks every byte in the specified * segments, on a large index it can take quite a long * time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex(List<String> onlySegments) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.read(dir); } catch (Throwable t) { msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) t.printStackTrace(infoStream); return result; } // find the oldest and newest segment versions String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE); String oldSegs = null; boolean foundNonNullVersion = false; Comparator<String> versionComparator = StringHelper.getVersionComparator(); for (SegmentInfo si : sis) { String version = si.getVersion(); if (version == null) { // pre-3.1 segment oldSegs = "pre-3.1"; } else if (version.equals("2.x")) { // an old segment that was 'touched' by 3.1+ code oldSegs = "2.x"; } else { foundNonNullVersion = true; if (versionComparator.compare(version, oldest) < 0) { oldest = version; } if (versionComparator.compare(version, newest) > 0) { newest = version; } } } final int numSegments = sis.size(); final String segmentsFileName = sis.getCurrentSegmentFileName(); IndexInput input = null; try { input = dir.openInput(segmentsFileName); } catch (Throwable t) { msg("ERROR: could not open segments file in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.readInt(); } catch (Throwable t) { msg("ERROR: could not read segment file version in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.close(); } String sFormat = ""; boolean skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_HAS_VECTORS) sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]"; else if (format == SegmentInfos.FORMAT_3_1) sFormat = "FORMAT_3_1 [Lucene 3.1+]"; else if (format == SegmentInfos.CURRENT_FORMAT) throw new RuntimeException("BUG: You should update this tool!"); else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.getUserData(); String userDataString; if (sis.getUserData().size() > 0) { userDataString = " userData=" + sis.getUserData(); } else { userDataString = ""; } String versionString = null; if (oldSegs != null) { if (foundNonNullVersion) { versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { versionString = "version=" + oldSegs; } } else { versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]"); } msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.print("\nChecking only these segments:"); for (String s : onlySegments) { if (infoStream != null) infoStream.print(" " + s); } result.segmentsChecked.addAll(onlySegments); msg(":"); } if (skip) { msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.clone(); result.newSegments.clear(); result.maxSegmentName = -1; for(int i=0;i<numSegments;i++) { final SegmentInfo info = sis.info(i); int segmentName = Integer.parseInt(info.name.substring(1), Character.MAX_RADIX); if (segmentName > result.maxSegmentName) { result.maxSegmentName = segmentName; } if (onlySegments != null && !onlySegments.contains(info.name)) continue; Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.add(segInfoStat); msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { msg(" compound=" + info.getUseCompoundFile()); segInfoStat.compound = info.getUseCompoundFile(); msg(" hasProx=" + info.getHasProx()); segInfoStat.hasProx = info.getHasProx(); msg(" numFiles=" + info.files().size()); segInfoStat.numFiles = info.files().size(); segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.); msg(" size (MB)=" + nf.format(segInfoStat.sizeMB)); Map<String,String> diagnostics = info.getDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.size() > 0) { msg(" diagnostics = " + diagnostics); } final int docStoreOffset = info.getDocStoreOffset(); if (docStoreOffset != -1) { msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; msg(" docStoreSegment=" + info.getDocStoreSegment()); segInfoStat.docStoreSegment = info.getDocStoreSegment(); msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); } final String delFileName = info.getDelFileName(); if (delFileName == null){ msg(" no deletions"); segInfoStat.hasDeletions = false; } else{ msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.print(" test: open reader........."); reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; final int numDocs = reader.numDocs(); toLoseDocCount = numDocs; if (reader.hasDeletions()) { if (reader.deletedDocs.count() != info.getDelCount()) { throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); } if (reader.deletedDocs.count() > reader.maxDoc()) { throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); } if (info.docCount - numDocs != info.getDelCount()){ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.getDelCount() != 0) { throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } msg("OK"); } if (reader.maxDoc() != info.docCount) throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.print(" test: fields.............."); } Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); msg("OK [" + fieldNames.size() + " fields]"); segInfoStat.numFields = fieldNames.size(); // Test Field Norms segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = testTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = testTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new RuntimeException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new RuntimeException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new RuntimeException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new RuntimeException("Term Vector test failed"); } msg(""); } catch (Throwable t) { msg("FAILED"); String comment; comment = "fixIndex() would remove reference to this segment"; msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) t.printStackTrace(infoStream); msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.close(); } // Keeper result.newSegments.add((SegmentInfo) info.clone()); } if (0 == result.numBadSegments) { result.clean = true; } else msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) { result.clean = false; result.newSegments.counter = result.maxSegmentName + 1; msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName); } if (result.clean) { msg("No problems were detected with this index.\n"); } return result; } /** * Test field norms. */ private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) { final Status.FieldNormStatus status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.print(" test: field norms........."); } final byte[] b = new byte[reader.maxDoc()]; for (final String fieldName : fieldNames) { if (reader.hasNorms(fieldName)) { reader.norms(fieldName, b, 0); ++status.totFields; } } msg("OK [" + status.totFields + " fields]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test the term index. */ private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final IndexSearcher is = new IndexSearcher(reader); try { if (infoStream != null) { infoStream.print(" test: terms, freq, prox..."); } final TermEnum termEnum = reader.terms(); final TermPositions termPositions = reader.termPositions(); // Used only to count up # deleted docs for this term final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); final int maxDoc = reader.maxDoc(); Term lastTerm = null; while (termEnum.next()) { status.termCount++; final Term term = termEnum.term(); lastTerm = term; final int docFreq = termEnum.docFreq(); if (docFreq <= 0) { throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); } termPositions.seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.next()) { freq0++; final int doc = termPositions.doc(); final int freq = termPositions.freq(); if (doc <= lastDoc) throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); if (doc >= maxDoc) throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); lastDoc = doc; if (freq <= 0) throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); int lastPos = -1; status.totPos += freq; for(int j=0;j<freq;j++) { final int pos = termPositions.nextPosition(); if (pos < -1) throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); if (pos < lastPos) throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); lastPos = pos; } } // Test skipping for(int idx=0;idx<7;idx++) { final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8); termPositions.seek(term); if (!termPositions.skipTo(skipDocID)) { break; } else { final int docID = termPositions.doc(); if (docID < skipDocID) { throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + ") returned docID=" + docID); } final int freq = termPositions.freq(); if (freq <= 0) { throw new RuntimeException("termFreq " + freq + " is out of bounds"); } int lastPosition = -1; for(int posUpto=0;posUpto<freq;posUpto++) { final int pos = termPositions.nextPosition(); if (pos < 0) { throw new RuntimeException("position " + pos + " is out of bounds"); } // TODO: we should assert when all pos == 0 that positions are actually omitted if (pos < lastPosition) { throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition); } lastPosition = pos; } if (!termPositions.next()) { break; } final int nextDocID = termPositions.doc(); if (nextDocID <= docID) { throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID); } } } // Now count how many deleted docs occurred in // this term: final int delCount; if (reader.hasDeletions()) { myTermDocs.seek(term); while(myTermDocs.next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } // Test search on last term: if (lastTerm != null) { is.search(new TermQuery(lastTerm), 1); } try { long uniqueTermCountAllFields = reader.getUniqueTermCount(); if (status.termCount != uniqueTermCountAllFields) { throw new RuntimeException("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.termCount)); } } catch (UnsupportedOperationException ex) { // not supported } msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test stored fields for a segment. */ private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) { final Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.print(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.isDeleted(j)) { status.docCount++; Document doc = reader.document(j); status.totFields += doc.getFields().size(); } } // Validate docCount if (status.docCount != reader.numDocs()) { throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } msg("OK [" + status.totFields + " total field count; avg " + format.format((((float) status.totFields)/status.docCount)) + " fields per doc]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test term vectors for a segment. */ private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) { final Status.TermVectorStatus status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.print(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.isDeleted(j)) { status.docCount++; TermFreqVector[] tfv = reader.getTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.length; } } } msg("OK [" + status.totVectors + " total vector count; avg " + format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** Repairs the index using previously returned result * from {@link #checkIndex}. Note that this does not * remove any of the unreferenced files after it's done; * you must separately open an {@link IndexWriter}, which * deletes unreferenced files when it's created. * * <p><b>WARNING</b>: this writes a * new segments file into the index, effectively removing * all documents in broken segments from the index. * BE CAREFUL. * * <p><b>WARNING</b>: Make sure you only call this when the * index is not opened by any writer. */ public void fixIndex(Status result) throws IOException { if (result.partial) throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); result.newSegments.changed(); result.newSegments.commit(result.dir); } private static boolean assertsOn; private static boolean testAsserts() { assertsOn = true; return true; } private static boolean assertsOn() { assert testAsserts(); return assertsOn; } /** Command-line interface to check and fix an index. <p> Run it like this: <pre> java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] </pre> <ul> <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments <li><code>-segment X</code>: only check the specified segment(s). This can be specified multiple times, to check more than one segment, eg <code>-segment _2 -segment _a</code>. You can't use this with the -fix option. </ul> <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause documents (perhaps many) to be permanently removed from the index. Always make a backup copy of your index before running this! Do not run this tool on an index that is actively being written to. You have been warned! <p> Run without -fix, this tool will open the index, report version information and report any exceptions it hits and what action it would take if -fix were specified. With -fix, this tool will remove any segments that have issues and write a new segments_N file. This means all documents contained in the affected segments will be removed. <p> This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. */ public static void main(String[] args) throws IOException, InterruptedException { boolean doFix = false; List<String> onlySegments = new ArrayList<String>(); String indexPath = null; int i = 0; while(i < args.length) { if (args[i].equals("-fix")) { doFix = true; i++; } else if (args[i].equals("-segment")) { if (i == args.length-1) { System.out.println("ERROR: missing name for -segment option"); System.exit(1); } onlySegments.add(args[i+1]); i += 2; } else { if (indexPath != null) { System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; i++; } } if (indexPath == null) { System.out.println("\nERROR: index path not specified"); System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); System.exit(1); } if (!assertsOn()) System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); if (onlySegments.size() == 0) onlySegments = null; else if (doFix) { System.out.println("ERROR: cannot specify both -fix and -segment"); System.exit(1); } System.out.println("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { dir = FSDirectory.open(new File(indexPath)); } catch (Throwable t) { System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(System.out); System.exit(1); } CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(System.out); Status result = checker.checkIndex(onlySegments); if (result.missingSegments) { System.exit(1); } if (!result.clean) { if (!doFix) { System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); } else { System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for(int s=0;s<5;s++) { Thread.sleep(1000); System.out.println(" " + (5-s) + "..."); } System.out.println("Writing..."); checker.fixIndex(result); System.out.println("OK"); System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); } } System.out.println(""); final int exitCode; if (result.clean == true) exitCode = 0; else exitCode = 1; System.exit(exitCode); } }