/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; /** * Tests for IndexWriter when the disk runs out of space */ public class TestIndexWriterOnDiskFull extends LuceneTestCase { /* * Make sure IndexWriter cleans up on hitting a disk * full exception in addDocument. * TODO: how to do this on windows with FSDirectory? */ public void testAddDocumentOnDiskFull() throws IOException { for(int pass=0;pass<2;pass++) { if (VERBOSE) { System.out.println("TEST: pass=" + pass); } boolean doAbort = pass == 1; long diskFree = TestUtil.nextInt(random(), 100, 300); boolean indexExists = false; while(true) { if (VERBOSE) { System.out.println("TEST: cycle: diskFree=" + diskFree); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory()); dir.setMaxSizeInBytes(diskFree); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); MergeScheduler ms = writer.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); } boolean hitError = false; try { for(int i=0;i<200;i++) { addDoc(writer); } if (VERBOSE) { System.out.println("TEST: done adding docs; now commit"); } writer.commit(); indexExists = true; } catch (IOException e) { if (VERBOSE) { System.out.println("TEST: exception on addDoc"); e.printStackTrace(System.out); } hitError = true; } if (hitError) { if (doAbort) { if (VERBOSE) { System.out.println("TEST: now rollback"); } writer.rollback(); } else { try { if (VERBOSE) { System.out.println("TEST: now close"); } writer.close(); } catch (IOException e) { if (VERBOSE) { System.out.println("TEST: exception on close; retry w/ no disk space limit"); e.printStackTrace(System.out); } dir.setMaxSizeInBytes(0); try { writer.close(); } catch (AlreadyClosedException ace) { // OK } } } //_TestUtil.syncConcurrentMerges(ms); if (indexExists) { // Make sure reader can open the index: DirectoryReader.open(dir).close(); } dir.close(); // Now try again w/ more space: diskFree += TEST_NIGHTLY ? TestUtil.nextInt(random(), 400, 600) : TestUtil.nextInt(random(), 3000, 5000); } else { //_TestUtil.syncConcurrentMerges(writer); dir.setMaxSizeInBytes(0); writer.close(); dir.close(); break; } } } } // TODO: make @Nightly variant that provokes more disk // fulls // TODO: have test fail if on any given top // iter there was not a single IOE hit /* Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in fact added. */ public void testAddIndexOnDiskFull() throws IOException { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): final String idFormat = TestUtil.getPostingsFormat("id"); final String contentFormat = TestUtil.getPostingsFormat("content"); assumeFalse("This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory")); int START_COUNT = 57; int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR* (TEST_NIGHTLY ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for(int i=0;i<NUM_DIR;i++) { dirs[i] = newDirectory(); IndexWriter writer = new IndexWriter(dirs[i], newIndexWriterConfig(new MockAnalyzer(random()))); for(int j=0;j<25;j++) { addDocWithIndex(writer, 25*i+j); } writer.close(); String[] files = dirs[i].listAll(); for(int j=0;j<files.length;j++) { inputDiskUsage += dirs[i].fileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = newMockDirectory(); IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(new MockAnalyzer(random()))); for(int j=0;j<START_COUNT;j++) { addDocWithIndex(writer, j); } writer.close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.open(startDir); assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs; assertEquals("first number of hits", 57, hits.length); reader.close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.listAll(); long diskUsage = startDir.sizeInBytes(); long startDiskUsage = 0; String[] files = startDir.listAll(); for(int i=0;i<files.length;i++) { startDiskUsage += startDir.fileLength(files[i]); } for(int iter=0;iter<3;iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage+ TestUtil.nextInt(random(), 50, 200); int method = iter; boolean success = false; boolean done = false; String methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while(!done) { if (VERBOSE) { System.out.println("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(startDir)); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy(false)); writer = new IndexWriter(dir, iwc); Exception err = null; for(int x=0;x<2;x++) { MergeScheduler ms = writer.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) { ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); } else { ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); } } // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree)/diskUsage; long thisDiskFree; String testName = null; if (0 == x) { dir.setRandomIOExceptionRateOnOpen(random().nextDouble()*0.01); thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) { testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } } else { dir.setRandomIOExceptionRateOnOpen(0.0); thisDiskFree = 0; rate = 0.0; if (VERBOSE) { testName = "disk full test " + methodName + " with unlimited disk space"; } } if (VERBOSE) { System.out.println("\ncycle: " + testName); } dir.setTrackDiskUsage(true); dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); try { if (0 == method) { if (VERBOSE) { System.out.println("TEST: now addIndexes count=" + dirs.length); } writer.addIndexes(dirs); if (VERBOSE) { System.out.println("TEST: now forceMerge"); } writer.forceMerge(1); } else if (1 == method) { DirectoryReader readers[] = new DirectoryReader[dirs.length]; for(int i=0;i<dirs.length;i++) { readers[i] = DirectoryReader.open(dirs[i]); } try { TestUtil.addIndexesSlowly(writer, readers); } finally { for(int i=0;i<dirs.length;i++) { readers[i].close(); } } } else { writer.addIndexes(dirs); } success = true; if (VERBOSE) { System.out.println(" success!"); } if (0 == x) { done = true; } } catch (IllegalStateException | IOException e) { success = false; err = e; if (VERBOSE) { System.out.println(" hit Exception: " + e); e.printStackTrace(System.out); } if (1 == x) { e.printStackTrace(System.out); fail(methodName + " hit IOException after disk space was freed up"); } } if (x == 1) { // Make sure all threads from ConcurrentMergeScheduler are done TestUtil.syncConcurrentMerges(writer); } else { dir.setRandomIOExceptionRateOnOpen(0.0); writer.rollback(); writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy(false))); } if (VERBOSE) { System.out.println(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.setRandomIOExceptionRateOnOpen(0.0); try { reader = DirectoryReader.open(dir); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (result != START_COUNT) { fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = newSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm), END_COUNT).scoreDocs; } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length; if (success) { if (result2 != result) { fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.close(); if (VERBOSE) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { System.out.println(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes vs limit=" + (2*(startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below: dir.setMaxSizeInBytes(0); dir.setRandomIOExceptionRate(0.0); dir.setRandomIOExceptionRateOnOpen(0.0); writer.close(); dir.close(); // Try again with more free space: diskFree += TEST_NIGHTLY ? TestUtil.nextInt(random(), 4000, 8000) : TestUtil.nextInt(random(), 40000, 80000); } } startDir.close(); for (Directory dir : dirs) dir.close(); } private static class FailTwiceDuringMerge extends MockDirectoryWrapper.Failure { public boolean didFail1; public boolean didFail2; @Override public void eval(MockDirectoryWrapper dir) throws IOException { if (!doFail) { return; } StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if (SegmentMerger.class.getName().equals(trace[i].getClassName()) && "mergeTerms".equals(trace[i].getMethodName()) && !didFail1) { didFail1 = true; throw new IOException("fake disk full during mergeTerms"); } if (LiveDocsFormat.class.getName().equals(trace[i].getClassName()) && "writeLiveDocs".equals(trace[i].getMethodName()) && !didFail2) { didFail2 = true; throw new IOException("fake disk full while writing LiveDocs"); } } } } // LUCENE-2593 public void testCorruptionAfterDiskFullDuringMerge() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); //IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random)).setReaderPooling(true)); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMergeScheduler(new SerialMergeScheduler()) .setReaderPooling(true) .setMergePolicy(newLogMergePolicy(2)) ); // we can do this because we add/delete/add (and dont merge to "nothing") w.setKeepFullyDeletedSegments(true); Document doc = new Document(); doc.add(newTextField("f", "doctor who", Field.Store.NO)); w.addDocument(doc); w.commit(); w.deleteDocuments(new Term("f", "who")); w.addDocument(doc); // disk fills up! FailTwiceDuringMerge ftdm = new FailTwiceDuringMerge(); ftdm.setDoFail(); dir.failOn(ftdm); expectThrows(IOException.class, () -> { w.commit(); }); assertTrue(ftdm.didFail1 || ftdm.didFail2); TestUtil.checkIndex(dir); ftdm.clearDoFail(); expectThrows(AlreadyClosedException.class, () -> { w.addDocument(doc); }); dir.close(); } // LUCENE-1130: make sure immeidate disk full on creating // an IndexWriter (hit during DW.ThreadState.init()) is // OK: public void testImmediateDiskFull() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergeScheduler(new ConcurrentMergeScheduler()) .setCommitOnClose(false)); writer.commit(); // empty commit, to not create confusing situation with first commit dir.setMaxSizeInBytes(Math.max(1, dir.getRecomputedActualSizeInBytes())); final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); expectThrows(IOException.class, () -> { writer.addDocument(doc); }); assertTrue(writer.deleter.isClosed()); assertTrue(writer.isClosed()); dir.close(); } // TODO: these are also in TestIndexWriter... add a simple doc-writing method // like this to LuceneTestCase? private void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); doc.add(newTextField("content", "aaa", Field.Store.NO)); doc.add(new NumericDocValuesField("numericdv", 1)); doc.add(new IntPoint("point", 1)); doc.add(new IntPoint("point2d", 1, 1)); writer.addDocument(doc); } private void addDocWithIndex(IndexWriter writer, int index) throws IOException { Document doc = new Document(); doc.add(newTextField("content", "aaa " + index, Field.Store.NO)); doc.add(newTextField("id", "" + index, Field.Store.NO)); doc.add(new NumericDocValuesField("numericdv", 1)); doc.add(new IntPoint("point", 1)); doc.add(new IntPoint("point2d", 1, 1)); writer.addDocument(doc); } }