package org.apache.lucene.index; /** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import junit.framework.Assert; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util._TestUtil; public class TestStressIndexing2 extends LuceneTestCase { static int maxFields=4; static int bigFieldSize=10; static boolean sameFieldOrder=false; static int mergeFactor=3; static int maxBufferedDocs=3; static int seed=0; public class MockIndexWriter extends IndexWriter { public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException { super(dir, conf); } @Override boolean testPoint(String name) { // if (name.equals("startCommit")) { if (random.nextInt(4) == 2) Thread.yield(); return true; } } public void testRandomIWReader() throws Throwable { Directory dir = newDirectory(); // TODO: verify equals using IW.getReader DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir); IndexReader reader = dw.writer.getReader(); dw.writer.commit(); verifyEquals(random, reader, dir, "id"); reader.close(); dw.writer.close(); dir.close(); } public void testRandom() throws Throwable { Directory dir1 = newDirectory(); Directory dir2 = newDirectory(); // mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1); int maxThreadStates = 1+random.nextInt(10); boolean doReaderPooling = random.nextBoolean(); Map<String,Document> docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling); indexSerial(random, docs, dir2); // verifying verify // verifyEquals(dir1, dir1, "id"); // verifyEquals(dir2, dir2, "id"); verifyEquals(dir1, dir2, "id"); dir1.close(); dir2.close(); } public void testMultiConfig() throws Throwable { // test lots of smaller different params together int num = atLeast(3); for (int i = 0; i < num; i++) { // increase iterations for better testing if (VERBOSE) { System.out.println("\n\nTEST: top iter=" + i); } sameFieldOrder=random.nextBoolean(); mergeFactor=random.nextInt(3)+2; maxBufferedDocs=random.nextInt(3)+2; int maxThreadStates = 1+random.nextInt(10); boolean doReaderPooling = random.nextBoolean(); seed++; int nThreads=random.nextInt(5)+1; int iter=random.nextInt(5)+1; int range=random.nextInt(20)+1; Directory dir1 = newDirectory(); Directory dir2 = newDirectory(); if (VERBOSE) { System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor); } Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling); if (VERBOSE) { System.out.println("TEST: index serial"); } indexSerial(random, docs, dir2); if (VERBOSE) { System.out.println("TEST: verify"); } verifyEquals(dir1, dir2, "id"); dir1.close(); dir2.close(); } } static Term idTerm = new Term("id",""); IndexingThread[] threads; static Comparator<Fieldable> fieldNameComparator = new Comparator<Fieldable>() { public int compare(Fieldable o1, Fieldable o2) { return o1.name().compareTo(o2.name()); } }; // This test avoids using any extra synchronization in the multiple // indexing threads to test that IndexWriter does correctly synchronize // everything. public static class DocsAndWriter { Map<String,Document> docs; IndexWriter writer; } public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException { Map<String,Document> docs = new HashMap<String,Document>(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB( 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); w.commit(); setUseCompoundFile(w.getConfig().getMergePolicy(), false); setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor); /*** w.setMaxMergeDocs(Integer.MAX_VALUE); w.setMaxFieldLength(10000); w.setRAMBufferSizeMB(1); w.setMergeFactor(10); ***/ threads = new IndexingThread[nThreads]; for (int i=0; i<threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000*i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i=0; i<threads.length; i++) { threads[i].start(); } for (int i=0; i<threads.length; i++) { threads[i].join(); } // w.optimize(); //w.close(); for (int i=0; i<threads.length; i++) { IndexingThread th = threads[i]; synchronized(th) { docs.putAll(th.docs); } } _TestUtil.checkIndex(dir); DocsAndWriter dw = new DocsAndWriter(); dw.docs = docs; dw.writer = w; return dw; } public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, boolean doReaderPooling) throws IOException, InterruptedException { Map<String,Document> docs = new HashMap<String,Document>(); for(int iter=0;iter<3;iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); setUseCompoundFile(w.getConfig().getMergePolicy(), false); setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor); threads = new IndexingThread[nThreads]; for (int i=0; i<threads.length; i++) { IndexingThread th = new IndexingThread(); th.w = w; th.base = 1000000*i; th.range = range; th.iterations = iterations; threads[i] = th; } for (int i=0; i<threads.length; i++) { threads[i].start(); } for (int i=0; i<threads.length; i++) { threads[i].join(); } // w.optimize(); w.close(); for (int i=0; i<threads.length; i++) { IndexingThread th = threads[i]; synchronized(th) { docs.putAll(th.docs); } } } _TestUtil.checkIndex(dir); return docs; } public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException { IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); // index all docs in a single thread Iterator<Document> iter = docs.values().iterator(); while (iter.hasNext()) { Document d = iter.next(); ArrayList<Fieldable> fields = new ArrayList<Fieldable>(); fields.addAll(d.getFields()); // put fields in same order each time Collections.sort(fields, fieldNameComparator); Document d1 = new Document(); d1.setBoost(d.getBoost()); for (int i=0; i<fields.size(); i++) { d1.add(fields.get(i)); } w.addDocument(d1); // System.out.println("indexing "+d1); } w.close(); } public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable { IndexReader r2 = IndexReader.open(dir2); verifyEquals(r1, r2, idField); r2.close(); } public static void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable { IndexReader r1 = IndexReader.open(dir1, true); IndexReader r2 = IndexReader.open(dir2, true); verifyEquals(r1, r2, idField); r1.close(); r2.close(); } private static void printDocs(IndexReader r) throws Throwable { IndexReader[] subs = r.getSequentialSubReaders(); for(IndexReader sub : subs) { System.out.println(" " + ((SegmentReader) sub).getSegmentInfo()); for(int docID=0;docID<sub.maxDoc();docID++) { Document doc = sub.document(docID); if (!sub.isDeleted(docID)) { System.out.println(" docID=" + docID + " id:" + doc.get("id")); } else { System.out.println(" DEL docID=" + docID + " id:" + doc.get("id")); } } } } public static void verifyEquals(IndexReader r1, IndexReader r2, String idField) throws Throwable { if (VERBOSE) { System.out.println("\nr1 docs:"); printDocs(r1); System.out.println("\nr2 docs:"); printDocs(r2); } if (r1.numDocs() != r2.numDocs()) { assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs(); } boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc()); int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.termDocs(); TermDocs termDocs2 = r2.termDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.intern(idField); TermEnum termEnum = r1.terms (new Term (idField, "")); do { Term term = termEnum.term(); if (term==null || term.field() != idField) break; termDocs1.seek (termEnum); if (!termDocs1.next()) { // This doc is deleted and wasn't replaced termDocs2.seek(termEnum); assertFalse(termDocs2.next()); continue; } int id1 = termDocs1.doc(); assertFalse(termDocs1.next()); termDocs2.seek(termEnum); assertTrue(termDocs2.next()); int id2 = termDocs2.doc(); assertFalse(termDocs2.next()); r2r1[id2] = id1; // verify stored fields are equivalent try { verifyEquals(r1.document(id1), r2.document(id2)); } catch (Throwable t) { System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term); System.out.println(" d1=" + r1.document(id1)); System.out.println(" d2=" + r2.document(id2)); throw t; } try { // verify term vectors are equivalent verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2)); } catch (Throwable e) { System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.getTermFreqVectors(id1); System.out.println(" d1=" + tv1); if (tv1 != null) for(int i=0;i<tv1.length;i++) System.out.println(" " + i + ": " + tv1[i]); TermFreqVector[] tv2 = r2.getTermFreqVectors(id2); System.out.println(" d2=" + tv2); if (tv2 != null) for(int i=0;i<tv2.length;i++) System.out.println(" " + i + ": " + tv2[i]); throw e; } } while (termEnum.next()); termEnum.close(); // Verify postings TermEnum termEnum1 = r1.terms (new Term ("", "")); TermEnum termEnum2 = r2.terms (new Term ("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.numDocs()]; long[] info2 = new long[r2.numDocs()]; for(;;) { Term term1,term2; // iterate until we get some docs int len1; for(;;) { len1=0; term1 = termEnum1.term(); if (term1==null) break; termDocs1.seek(termEnum1); while (termDocs1.next()) { int d1 = termDocs1.doc(); int f1 = termDocs1.freq(); info1[len1] = (((long)d1)<<32) | f1; len1++; } if (len1>0) break; if (!termEnum1.next()) break; } // iterate until we get some docs int len2; for(;;) { len2=0; term2 = termEnum2.term(); if (term2==null) break; termDocs2.seek(termEnum2); while (termDocs2.next()) { int d2 = termDocs2.doc(); int f2 = termDocs2.freq(); info2[len2] = (((long)r2r1[d2])<<32) | f2; len2++; } if (len2>0) break; if (!termEnum2.next()) break; } if (!hasDeletes) assertEquals(termEnum1.docFreq(), termEnum2.docFreq()); assertEquals(len1, len2); if (len1==0) break; // no more terms assertEquals(term1, term2); // sort info2 to get it into ascending docid Arrays.sort(info2, 0, len2); // now compare for (int i=0; i<len1; i++) { assertEquals(info1[i], info2[i]); } termEnum1.next(); termEnum2.next(); } } public static void verifyEquals(Document d1, Document d2) { List<Fieldable> ff1 = d1.getFields(); List<Fieldable> ff2 = d2.getFields(); Collections.sort(ff1, fieldNameComparator); Collections.sort(ff2, fieldNameComparator); assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size()); for (int i=0; i<ff1.size(); i++) { Fieldable f1 = ff1.get(i); Fieldable f2 = ff2.get(i); if (f1.isBinary()) { assert(f2.isBinary()); } else { String s1 = f1.stringValue(); String s2 = f2.stringValue(); assertEquals(ff1 + " : " + ff2, s1,s2); } } } public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) { if (d1 == null) { assertTrue(d2 == null); return; } assertTrue(d2 != null); assertEquals(d1.length, d2.length); for(int i=0;i<d1.length;i++) { TermFreqVector v1 = d1[i]; TermFreqVector v2 = d2[i]; if (v1 == null || v2 == null) System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length); assertEquals(v1.size(), v2.size()); int numTerms = v1.size(); String[] terms1 = v1.getTerms(); String[] terms2 = v2.getTerms(); int[] freq1 = v1.getTermFrequencies(); int[] freq2 = v2.getTermFrequencies(); for(int j=0;j<numTerms;j++) { if (!terms1[j].equals(terms2[j])) assertEquals(terms1[j], terms2[j]); assertEquals(freq1[j], freq2[j]); } if (v1 instanceof TermPositionVector) { assertTrue(v2 instanceof TermPositionVector); TermPositionVector tpv1 = (TermPositionVector) v1; TermPositionVector tpv2 = (TermPositionVector) v2; for(int j=0;j<numTerms;j++) { int[] pos1 = tpv1.getTermPositions(j); int[] pos2 = tpv2.getTermPositions(j); if (pos1 == null) { assertNull(pos2); } else { assertNotNull(pos1); assertNotNull(pos2); assertEquals(pos1.length, pos2.length); TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j); TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j); if (offsets1 == null) assertTrue(offsets2 == null); else assertTrue(offsets2 != null); for(int k=0;k<pos1.length;k++) { assertEquals(pos1[k], pos2[k]); if (offsets1 != null) { assertEquals(offsets1[k].getStartOffset(), offsets2[k].getStartOffset()); assertEquals(offsets1[k].getEndOffset(), offsets2[k].getEndOffset()); } } } } } } } private class IndexingThread extends Thread { IndexWriter w; int base; int range; int iterations; Map<String,Document> docs = new HashMap<String,Document>(); Random r; public int nextInt(int lim) { return r.nextInt(lim); } // start is inclusive and end is exclusive public int nextInt(int start, int end) { return start + r.nextInt(end-start); } char[] buffer = new char[100]; private int addUTF8Token(int start) { final int end = start + nextInt(20); if (buffer.length < 1+end) { char[] newBuffer = new char[(int) ((1+end)*1.25)]; System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); buffer = newBuffer; } for(int i=start;i<end;i++) { int t = nextInt(6); if (0 == t && i < end-1) { // Make a surrogate pair // High surrogate buffer[i++] = (char) nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (char) nextInt(0xdc00, 0xe000); } else if (t <= 1) buffer[i] = (char) nextInt(0x80); else if (2 == t) buffer[i] = (char) nextInt(0x80, 0x800); else if (3 == t) buffer[i] = (char) nextInt(0x800, 0xd800); else if (4 == t) buffer[i] = (char) nextInt(0xe000, 0xffff); else if (5 == t) { // Illegal unpaired surrogate if (r.nextBoolean()) buffer[i] = (char) nextInt(0xd800, 0xdc00); else buffer[i] = (char) nextInt(0xdc00, 0xe000); } } buffer[end] = ' '; return 1+end; } public String getString(int nTokens) { nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1; // Half the time make a random UTF8 string if (r.nextBoolean()) return getUTF8String(nTokens); // avoid StringBuffer because it adds extra synchronization. char[] arr = new char[nTokens*2]; for (int i=0; i<nTokens; i++) { arr[i*2] = (char)('A' + r.nextInt(10)); arr[i*2+1] = ' '; } return new String(arr); } public String getUTF8String(int nTokens) { int upto = 0; Arrays.fill(buffer, (char) 0); for(int i=0;i<nTokens;i++) upto = addUTF8Token(upto); return new String(buffer, 0, upto); } public String getIdString() { return Integer.toString(base + nextInt(range)); } public void indexDoc() throws IOException { Document d = new Document(); ArrayList<Field> fields = new ArrayList<Field>(); String idString = getIdString(); Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); fields.add(idField); int nFields = nextInt(maxFields); for (int i=0; i<nFields; i++) { Field.TermVector tvVal = Field.TermVector.NO; switch (nextInt(4)) { case 0: tvVal = Field.TermVector.NO; break; case 1: tvVal = Field.TermVector.YES; break; case 2: tvVal = Field.TermVector.WITH_POSITIONS; break; case 3: tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS; break; } switch (nextInt(4)) { case 0: fields.add(newField("f" + nextInt(100), getString(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, tvVal)); break; case 1: fields.add(newField("f" + nextInt(100), getString(0), Field.Store.NO, Field.Index.ANALYZED, tvVal)); break; case 2: fields.add(newField("f" + nextInt(100), getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); break; case 3: fields.add(newField("f" + nextInt(100), getString(bigFieldSize), Field.Store.YES, Field.Index.ANALYZED, tvVal)); break; } } if (sameFieldOrder) { Collections.sort(fields, fieldNameComparator); } else { // random placement of id field also Collections.swap(fields,nextInt(fields.size()), 0); } for (int i=0; i<fields.size(); i++) { d.add(fields.get(i)); } if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString); } w.updateDocument(idTerm.createTerm(idString), d); //System.out.println(Thread.currentThread().getName() + ": indexing "+d); docs.put(idString, d); } public void deleteDoc() throws IOException { String idString = getIdString(); if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": del id:" + idString); } w.deleteDocuments(idTerm.createTerm(idString)); docs.remove(idString); } public void deleteByQuery() throws IOException { String idString = getIdString(); if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": del query id:" + idString); } w.deleteDocuments(new TermQuery(idTerm.createTerm(idString))); docs.remove(idString); } @Override public void run() { try { r = new Random(base+range+seed); for (int i=0; i<iterations; i++) { int what = nextInt(100); if (what < 5) { deleteDoc(); } else if (what < 10) { deleteByQuery(); } else { indexDoc(); } } } catch (Throwable e) { e.printStackTrace(); Assert.fail(e.toString()); } synchronized (this) { docs.size(); } } } }