package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import java.text.NumberFormat; import java.io.PrintStream; import java.io.IOException; import java.io.File; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.ArrayList; import java.util.Map; /** * Basic tool and API to check the health of an index and * write a new segments file that removes reference to * problematic segments. * * <p>As this tool checks every byte in the index, on a large * index it can take quite a long time to run. * * @lucene.experimental Please make a complete backup of your * index before using this to fix your index! */ public class CheckIndex { private PrintStream infoStream; private Directory dir; /** * Returned from {@link #checkIndex()} detailing the health and status of the index. * * @lucene.experimental **/ public static class Status { /** True if no problems were found with the index. */ public boolean clean; /** True if we were unable to locate and load the segments_N file. */ public boolean missingSegments; /** True if we were unable to open the segments_N file. */ public boolean cantOpenSegments; /** True if we were unable to read the version number from segments_N file. */ public boolean missingSegmentVersion; /** Name of latest segments_N file in the index. */ public String segmentsFileName; /** Number of segments in the index. */ public int numSegments; /** String description of the version of the index. */ public String segmentFormat; /** Empty unless you passed specific segments list to check as optional 3rd argument. * @see CheckIndex#checkIndex(List) */ public List<String> segmentsChecked = new ArrayList<String>(); /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ public boolean toolOutOfDate; /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>(); /** Directory index is in. */ public Directory dir; /** * SegmentInfos instance containing only segments that * had no problems (this is used with the {@link CheckIndex#fixIndex} * method to repair the index. */ SegmentInfos newSegments; /** How many documents will be lost to bad segments. */ public int totLoseDocCount; /** How many bad segments were found. */ public int numBadSegments; /** True if we checked only specific segments ({@link * #checkIndex(List)}) was called with non-null * argument). */ public boolean partial; /** Holds the userData of the last commit in the index */ public Map<String, String> userData; /** Holds the status of each segment in the index. * See {@link #segmentInfos}. * * <p><b>WARNING</b>: this API is new and experimental and is * subject to suddenly change in the next release. */ public static class SegmentInfoStatus { /** Name of the segment. */ public String name; /** Name of codec used to read this segment. */ public String codec; /** Document count (does not take deletions into account). */ public int docCount; /** True if segment is compound file format. */ public boolean compound; /** Number of files referenced by this segment. */ public int numFiles; /** Net size (MB) of the files referenced by this * segment. */ public double sizeMB; /** Doc store offset, if this segment shares the doc * store files (stored fields and term vectors) with * other segments. This is -1 if it does not share. */ public int docStoreOffset = -1; /** String of the shared doc store segment, or null if * this segment does not share the doc store files. */ public String docStoreSegment; /** True if the shared doc store files are compound file * format. */ public boolean docStoreCompoundFile; /** True if this segment has pending deletions. */ public boolean hasDeletions; /** Name of the current deletions file name. */ public String deletionsFileName; /** Number of deleted documents. */ public int numDeleted; /** True if we were able to open a SegmentReader on this * segment. */ public boolean openReaderPassed; /** Number of fields in this segment. */ int numFields; /** True if at least one of the fields in this segment * does not omitTermFreqAndPositions. * @see AbstractField#setOmitTermFreqAndPositions */ public boolean hasProx; /** Map that includes certain * debugging details that IndexWriter records into * each segment it creates */ public Map<String,String> diagnostics; /** Status for testing of field norms (null if field norms could not be tested). */ public FieldNormStatus fieldNormStatus; /** Status for testing of indexed terms (null if indexed terms could not be tested). */ public TermIndexStatus termIndexStatus; /** Status for testing of stored fields (null if stored fields could not be tested). */ public StoredFieldStatus storedFieldStatus; /** Status for testing of term vectors (null if term vectors could not be tested). */ public TermVectorStatus termVectorStatus; } /** * Status from testing field norms. */ public static final class FieldNormStatus { /** Number of fields successfully tested */ public long totFields = 0L; /** Exception thrown during term index test (null on success) */ public Throwable error = null; } /** * Status from testing term index. */ public static final class TermIndexStatus { /** Total term count */ public long termCount = 0L; /** Total frequency across all terms. */ public long totFreq = 0L; /** Total number of positions. */ public long totPos = 0L; /** Exception thrown during term index test (null on success) */ public Throwable error = null; } /** * Status from testing stored fields. */ public static final class StoredFieldStatus { /** Number of documents tested. */ public int docCount = 0; /** Total number of stored fields tested. */ public long totFields = 0; /** Exception thrown during stored fields test (null on success) */ public Throwable error = null; } /** * Status from testing stored fields. */ public static final class TermVectorStatus { /** Number of documents tested. */ public int docCount = 0; /** Total number of term vectors tested. */ public long totVectors = 0; /** Exception thrown during term vector test (null on success) */ public Throwable error = null; } } /** Create a new CheckIndex on the directory. */ public CheckIndex(Directory dir) { this.dir = dir; infoStream = null; } /** Set infoStream where messages should go. If null, no * messages are printed */ public void setInfoStream(PrintStream out) { infoStream = out; } private void msg(String msg) { if (infoStream != null) infoStream.println(msg); } /** Returns a {@link Status} instance detailing * the state of the index. * * <p>As this method checks every byte in the index, on a large * index it can take quite a long time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex() throws IOException { return checkIndex(null); } public Status checkIndex(List<String> onlySegments) throws IOException { return checkIndex(onlySegments, CodecProvider.getDefault()); } /** Returns a {@link Status} instance detailing * the state of the index. * * @param onlySegments list of specific segment names to check * * <p>As this method checks every byte in the specified * segments, on a large index it can take quite a long * time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex(List<String> onlySegments, CodecProvider codecs) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.read(dir, codecs); } catch (Throwable t) { msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) t.printStackTrace(infoStream); return result; } final int numSegments = sis.size(); final String segmentsFileName = sis.getCurrentSegmentFileName(); IndexInput input = null; try { input = dir.openInput(segmentsFileName); } catch (Throwable t) { msg("ERROR: could not open segments file in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.readInt(); } catch (Throwable t) { msg("ERROR: could not read segment file version in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.close(); } String sFormat = ""; boolean skip = false; if (format == DefaultSegmentInfosWriter.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format == DefaultSegmentInfosWriter.FORMAT_4_0) { sFormat = "FORMAT_4_0 [Lucene 4.0]"; } else if (format < DefaultSegmentInfosWriter.FORMAT_CURRENT) { sFormat = "int=" + format + " [newer version of Lucene than this tool supports]"; skip = true; } else if (format > DefaultSegmentInfosWriter.FORMAT_MINIMUM) { sFormat = "int=" + format + " [older version of Lucene than this tool supports]"; skip = true; } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.getUserData(); String userDataString; if (sis.getUserData().size() > 0) { userDataString = " userData=" + sis.getUserData(); } else { userDataString = ""; } msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.print("\nChecking only these segments:"); for (String s : onlySegments) { if (infoStream != null) infoStream.print(" " + s); } result.segmentsChecked.addAll(onlySegments); msg(":"); } if (skip) { msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.clone(); result.newSegments.clear(); for(int i=0;i<numSegments;i++) { final SegmentInfo info = sis.info(i); if (onlySegments != null && !onlySegments.contains(info.name)) continue; Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.add(segInfoStat); msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { final String codec = info.getCodec().name; msg(" codec=" + codec); segInfoStat.codec = codec; msg(" compound=" + info.getUseCompoundFile()); segInfoStat.compound = info.getUseCompoundFile(); msg(" hasProx=" + info.getHasProx()); segInfoStat.hasProx = info.getHasProx(); msg(" numFiles=" + info.files().size()); segInfoStat.numFiles = info.files().size(); msg(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.))); segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); Map<String,String> diagnostics = info.getDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.size() > 0) { msg(" diagnostics = " + diagnostics); } final int docStoreOffset = info.getDocStoreOffset(); if (docStoreOffset != -1) { msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; msg(" docStoreSegment=" + info.getDocStoreSegment()); segInfoStat.docStoreSegment = info.getDocStoreSegment(); msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); } final String delFileName = info.getDelFileName(); if (delFileName == null){ msg(" no deletions"); segInfoStat.hasDeletions = false; } else{ msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.print(" test: open reader........."); reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; final int numDocs = reader.numDocs(); toLoseDocCount = numDocs; if (reader.hasDeletions()) { if (reader.deletedDocs.count() != info.getDelCount()) { throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); } if (reader.deletedDocs.count() > reader.maxDoc()) { throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); } if (info.docCount - numDocs != info.getDelCount()){ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.getDelCount() != 0) { throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } msg("OK"); } if (reader.maxDoc() != info.docCount) throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.print(" test: fields.............."); } Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); msg("OK [" + fieldNames.size() + " fields]"); segInfoStat.numFields = fieldNames.size(); // Test Field Norms segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = testTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new RuntimeException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new RuntimeException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new RuntimeException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new RuntimeException("Term Vector test failed"); } msg(""); } catch (Throwable t) { msg("FAILED"); String comment; comment = "fixIndex() would remove reference to this segment"; msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) t.printStackTrace(infoStream); msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.close(); } // Keeper result.newSegments.add((SegmentInfo) info.clone()); } if (0 == result.numBadSegments) { result.clean = true; msg("No problems were detected with this index.\n"); } else msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; } /** * Test field norms. */ private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) { final Status.FieldNormStatus status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.print(" test: field norms........."); } final byte[] b = new byte[reader.maxDoc()]; for (final String fieldName : fieldNames) { reader.norms(fieldName, b, 0); ++status.totFields; } msg("OK [" + status.totFields + " fields]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test the term index. */ private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final int maxDoc = reader.maxDoc(); final Bits delDocs = reader.getDeletedDocs(); try { if (infoStream != null) { infoStream.print(" test: terms, freq, prox..."); } final Fields fields = reader.fields(); if (fields == null) { msg("OK [no fields/terms]"); return status; } final FieldsEnum fieldsEnum = fields.iterator(); while(true) { final String field = fieldsEnum.next(); if (field == null) { break; } final TermsEnum terms = fieldsEnum.terms(); DocsEnum docs = null; DocsAndPositionsEnum postings = null; boolean hasOrd = true; final long termCountStart = status.termCount; BytesRef lastTerm = null; Comparator<BytesRef> termComp = terms.getComparator(); while(true) { final BytesRef term = terms.next(); if (term == null) { break; } // make sure terms arrive in order according to // the comp if (lastTerm == null) { lastTerm = new BytesRef(term); } else { if (termComp.compare(lastTerm, term) >= 0) { throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term); } lastTerm.copy(term); } final int docFreq = terms.docFreq(); status.totFreq += docFreq; docs = terms.docs(delDocs, docs); postings = terms.docsAndPositions(delDocs, postings); if (hasOrd) { long ord = -1; try { ord = terms.ord(); } catch (UnsupportedOperationException uoe) { hasOrd = false; } if (hasOrd) { final long ordExpected = status.termCount - termCountStart; if (ord != ordExpected) { throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); } } } status.termCount++; final DocsEnum docs2; if (postings != null) { docs2 = postings; } else { docs2 = docs; } int lastDoc = -1; while(true) { final int doc = docs2.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } final int freq = docs2.freq(); status.totPos += freq; if (doc <= lastDoc) { throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; if (postings != null) { for(int j=0;j<freq;j++) { final int pos = postings.nextPosition(); if (pos < -1) { throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; if (postings.hasPayload()) { postings.getPayload(); } } } } // Now count how many deleted docs occurred in // this term: if (reader.hasDeletions()) { final DocsEnum docsNoDel = terms.docs(null, docs); int count = 0; while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } if (count != docFreq) { throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + count); } } } } msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test stored fields for a segment. */ private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) { final Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.print(" test: stored fields......."); } // Scan stored fields for all documents final Bits delDocs = reader.getDeletedDocs(); for (int j = 0; j < info.docCount; ++j) { if (delDocs == null || !delDocs.get(j)) { status.docCount++; Document doc = reader.document(j); status.totFields += doc.getFields().size(); } } // Validate docCount if (status.docCount != reader.numDocs()) { throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } msg("OK [" + status.totFields + " total field count; avg " + format.format((((float) status.totFields)/status.docCount)) + " fields per doc]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** * Test term vectors for a segment. */ private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) { final Status.TermVectorStatus status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.print(" test: term vectors........"); } final Bits delDocs = reader.getDeletedDocs(); for (int j = 0; j < info.docCount; ++j) { if (delDocs == null || !delDocs.get(j)) { status.docCount++; TermFreqVector[] tfv = reader.getTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.length; } } } msg("OK [" + status.totVectors + " total vector count; avg " + format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]"); } catch (Throwable e) { msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; } /** Repairs the index using previously returned result * from {@link #checkIndex}. Note that this does not * remove any of the unreferenced files after it's done; * you must separately open an {@link IndexWriter}, which * deletes unreferenced files when it's created. * * <p><b>WARNING</b>: this writes a * new segments file into the index, effectively removing * all documents in broken segments from the index. * BE CAREFUL. * * <p><b>WARNING</b>: Make sure you only call this when the * index is not opened by any writer. */ public void fixIndex(Status result) throws IOException { if (result.partial) throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); result.newSegments.commit(result.dir); } private static boolean assertsOn; private static boolean testAsserts() { assertsOn = true; return true; } private static boolean assertsOn() { assert testAsserts(); return assertsOn; } /** Command-line interface to check and fix an index. <p> Run it like this: <pre> java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] </pre> <ul> <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments <li><code>-segment X</code>: only check the specified segment(s). This can be specified multiple times, to check more than one segment, eg <code>-segment _2 -segment _a</code>. You can't use this with the -fix option. </ul> <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause documents (perhaps many) to be permanently removed from the index. Always make a backup copy of your index before running this! Do not run this tool on an index that is actively being written to. You have been warned! <p> Run without -fix, this tool will open the index, report version information and report any exceptions it hits and what action it would take if -fix were specified. With -fix, this tool will remove any segments that have issues and write a new segments_N file. This means all documents contained in the affected segments will be removed. <p> This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. */ public static void main(String[] args) throws IOException, InterruptedException { boolean doFix = false; List<String> onlySegments = new ArrayList<String>(); String indexPath = null; int i = 0; while(i < args.length) { if (args[i].equals("-fix")) { doFix = true; i++; } else if (args[i].equals("-segment")) { if (i == args.length-1) { System.out.println("ERROR: missing name for -segment option"); System.exit(1); } onlySegments.add(args[i+1]); i += 2; } else { if (indexPath != null) { System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; i++; } } if (indexPath == null) { System.out.println("\nERROR: index path not specified"); System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); System.exit(1); } if (!assertsOn()) System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); if (onlySegments.size() == 0) onlySegments = null; else if (doFix) { System.out.println("ERROR: cannot specify both -fix and -segment"); System.exit(1); } System.out.println("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { dir = FSDirectory.open(new File(indexPath)); } catch (Throwable t) { System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(System.out); System.exit(1); } CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(System.out); Status result = checker.checkIndex(onlySegments); if (result.missingSegments) { System.exit(1); } if (!result.clean) { if (!doFix) { System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); } else { System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for(int s=0;s<5;s++) { Thread.sleep(1000); System.out.println(" " + (5-s) + "..."); } System.out.println("Writing..."); checker.fixIndex(result); System.out.println("OK"); System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); } } System.out.println(""); final int exitCode; if (result.clean == true) exitCode = 0; else exitCode = 1; System.exit(exitCode); } }