package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.FieldCacheSanityChecker;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.apache.lucene.util.FieldCacheSanityChecker.InsanityType;
import java.util.Random;
import java.util.Arrays;
public class TestStressSort extends LuceneTestCase {
private final static int NUM_DOCS = 5000;
// NOTE: put seed in here to make failures
// deterministic, but do not commit with a seed (to
// better test):
private Random r;
private Directory dir, dir2, dir3;
private IndexSearcher searcherMultiSegment;
private IndexSearcher searcherFewSegment;
private IndexSearcher searcherSingleSegment;
private static final boolean VERBOSE = false;
// min..max
private int nextInt(int min, int max) {
return min + r.nextInt(max-min+1);
}
// 0..(lim-1)
private int nextInt(int lim) {
return r.nextInt(lim);
}
final char[] buffer = new char[20];
private String randomString(int size) {
assert size < 20;
for(int i=0;i<size;i++) {
buffer[i] = (char) nextInt(48, 122);
}
return new String(buffer, 0, size);
}
private void create() throws Throwable {
// NOTE: put seed in here to make failures
// deterministic, but do not commit with a seed (to
// better test):
dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(17);
final Document doc = new Document();
final Document doc2 = new Document();
final Field id = new Field("id", "", Field.Store.YES, Field.Index.NO);
doc.add(id);
doc2.add(id);
final Field contents = new Field("contents", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(contents);
doc2.add(contents);
final Field byteField = new Field("byte", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(byteField);
doc2.add(byteField);
final Field shortField = new Field("short", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(shortField);
doc2.add(shortField);
final Field intField = new Field("int", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(intField);
doc2.add(intField);
final Field longField = new Field("long", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(longField);
doc2.add(longField);
final Field floatField = new Field("float", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(floatField);
doc2.add(floatField);
final Field doubleField = new Field("double", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(doubleField);
doc2.add(doubleField);
// we use two diff string fields so our FieldCache usage
// is less suspicious to cache inspection
final Field stringField = new Field("string", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(stringField);
final Field stringFieldIdx = new Field("stringIdx", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(stringFieldIdx);
// doc2 doesn't have stringField or stringFieldIdx, so we get nulls
for(int i=0;i<NUM_DOCS;i++) {
id.setValue(""+i);
if (i % 1000 == 0) {
contents.setValue("a b c z");
} else if (i % 100 == 0) {
contents.setValue("a b c y");
} else if (i % 10 == 0) {
contents.setValue("a b c x");
} else {
contents.setValue("a b c");
}
byteField.setValue(""+nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE));
if (nextInt(10) == 3) {
shortField.setValue(""+Short.MIN_VALUE);
} else if (nextInt(10) == 7) {
shortField.setValue(""+Short.MAX_VALUE);
} else {
shortField.setValue(""+nextInt(Short.MIN_VALUE, Short.MAX_VALUE));
}
if (nextInt(10) == 3) {
intField.setValue(""+Integer.MIN_VALUE);
} else if (nextInt(10) == 7) {
intField.setValue(""+Integer.MAX_VALUE);
} else {
intField.setValue(""+r.nextInt());
}
if (nextInt(10) == 3) {
longField.setValue(""+Long.MIN_VALUE);
} else if (nextInt(10) == 7) {
longField.setValue(""+Long.MAX_VALUE);
} else {
longField.setValue(""+r.nextLong());
}
floatField.setValue(""+r.nextFloat());
doubleField.setValue(""+r.nextDouble());
if (i % 197 == 0) {
writer.addDocument(doc2);
} else {
String r = randomString(nextInt(20));
stringField.setValue(r);
stringFieldIdx.setValue(r);
writer.addDocument(doc);
}
}
writer.close();
searcherMultiSegment = new IndexSearcher(dir);
searcherMultiSegment.setDefaultFieldSortScoring(true, true);
dir2 = new MockRAMDirectory(dir);
writer = new IndexWriter(dir2, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
writer.optimize();
writer.close();
searcherSingleSegment = new IndexSearcher(dir2);
searcherSingleSegment.setDefaultFieldSortScoring(true, true);
dir3 = new MockRAMDirectory(dir);
writer = new IndexWriter(dir3, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
writer.optimize(3);
writer.close();
searcherFewSegment = new IndexSearcher(dir3);
searcherFewSegment.setDefaultFieldSortScoring(true, true);
}
private void close() throws Throwable {
searcherMultiSegment.close();
searcherFewSegment.close();
searcherSingleSegment.close();
dir.close();
dir2.close();
}
public void testSort() throws Throwable {
r = newRandom();
// reverse & not
// all types
// restrictive & non restrictive searches (on contents)
create();
Sort[] sorts = new Sort[50];
int sortCount = 0;
for(int r=0;r<2;r++) {
Sort sort;
boolean reverse = 1 == r;
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("byte", SortField.BYTE, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("short", SortField.SHORT, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("int", SortField.INT, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("long", SortField.LONG, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("float", SortField.FLOAT, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("double", SortField.DOUBLE, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("string", SortField.STRING_VAL, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField("stringIdx", SortField.STRING, reverse)});
//sorts[sortCount++] = sort = new Sort();
//sort.setSort(new SortField[] {new SortField("string", SortField.STRING_ORD, reverse)});
//sorts[sortCount++] = sort = new Sort();
//sort.setSort(new SortField[] {new SortField("string", SortField.STRING_ORD_VAL, reverse)});
//sorts[sortCount++] = sort = new Sort();
//sort.setSort(new SortField[] {new SortField("string", SortField.STRING_ORD_VAL_DEM, reverse)});
//sorts[sortCount++] = sort = new Sort();
//sort.setSort(new SortField[] {new SortField("string", SortField.STRING_ORD_VAL_DEM2, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField(null, SortField.SCORE, reverse)});
sorts[sortCount++] = sort = new Sort();
sort.setSort(new SortField[] {new SortField(null, SortField.DOC, reverse)});
}
Query[] queries = new Query[4];
queries[0] = new MatchAllDocsQuery();
queries[1] = new TermQuery(new Term("contents", "x")); // matches every 10th doc
queries[2] = new TermQuery(new Term("contents", "y")); // matches every 100th doc
queries[3] = new TermQuery(new Term("contents", "z")); // matches every 1000th doc
for(int sx=0;sx<3;sx++) {
final IndexSearcher searcher;
if (sx == 0) {
searcher = searcherSingleSegment;
} else if (sx == 1) {
searcher = searcherFewSegment;
} else {
searcher = searcherMultiSegment;
}
for(int qx=0;qx<queries.length;qx++) {
final Query query = queries[qx];
for(int q=0;q<3;q++) {
final int queueSize;
if (q == 0) {
queueSize = 10;
} else if (q == 1) {
queueSize = 100;
} else {
queueSize = 1000;
}
for(int s=0;s<sortCount;s++) {
Sort sort1 = sorts[s];
for(int s2=-1;s2<sortCount;s2++) {
Sort sort;
if (s2 == -1) {
// Single field sort
sort = sort1;
} else {
sort = new Sort(new SortField[] {sort1.getSort()[0], sorts[s2].getSort()[0]});
}
// Old
Sort oldSort = getOldSort(sort);
if (VERBOSE) {
System.out.println("query=" + query);
if (sx == 0) {
System.out.println(" single-segment index");
} else if (sx == 1) {
System.out.println(" few-segment index");
} else {
System.out.println(" many-segment index");
}
System.out.println(" numHit=" + queueSize);
System.out.println(" old=" + oldSort);
System.out.println(" new=" + sort);
}
TopDocs newHits = searcher.search(query, null, queueSize, sort);
TopDocs oldHits = searcher.search(query, null, queueSize, oldSort);
compare(oldHits, newHits);
}
}
}
}
}
// we explicitly test the old sort method and
// compare with the new, so we expect to see SUBREADER
// sanity checks fail.
Insanity[] insanity = FieldCacheSanityChecker.checkSanity
(FieldCache.DEFAULT);
try {
int ignored = 0;
for (int i = 0; i < insanity.length; i++) {
if (insanity[i].getType() == InsanityType.SUBREADER) {
insanity[i] = new Insanity(InsanityType.EXPECTED,
insanity[i].getMsg(),
insanity[i].getCacheEntries());
ignored++;
}
}
assertEquals("Not all insane field cache usage was expected",
ignored, insanity.length);
insanity = null;
} finally {
// report this in the event of any exception/failure
// if no failure, then insanity will be null
if (null != insanity) {
dumpArray(getTestLabel() + ": Insane FieldCache usage(s)", insanity, System.err);
}
}
// we've already checked FieldCache, purge so tearDown doesn't complain
purgeFieldCache(FieldCache.DEFAULT); // so
close();
}
private Sort getOldSort(Sort sort) {
SortField[] fields = sort.getSort();
SortField[] oldFields = new SortField[fields.length];
for(int i=0;i<fields.length;i++) {
int sortType;
if (fields[i].getField() != null && fields[i].getField().equals("string")) {
sortType = SortField.STRING;
} else {
sortType = fields[i].getType();
}
oldFields[i] = new SortField(fields[i].getField(),
sortType,
fields[i].getReverse());
oldFields[i].setUseLegacySearch(true);
}
return new Sort(oldFields);
}
private void compare(TopDocs oldHits, TopDocs newHits) {
assertEquals(oldHits.totalHits, newHits.totalHits);
assertEquals(oldHits.scoreDocs.length, newHits.scoreDocs.length);
final ScoreDoc[] oldDocs = oldHits.scoreDocs;
final ScoreDoc[] newDocs = newHits.scoreDocs;
for(int i=0;i<oldDocs.length;i++) {
if (oldDocs[i] instanceof FieldDoc) {
assert newDocs[i] instanceof FieldDoc;
FieldDoc oldHit = (FieldDoc) oldDocs[i];
FieldDoc newHit = (FieldDoc) newDocs[i];
assertEquals("hit " + i + " of " + oldDocs.length + " differs: oldDoc=" + oldHit.doc + " vs newDoc=" + newHit.doc + " oldFields=" + _TestUtil.arrayToString(oldHit.fields) + " newFields=" + _TestUtil.arrayToString(newHit.fields),
oldHit.doc, newHit.doc);
assertEquals(oldHit.score, newHit.score, 0.00001);
assertTrue(Arrays.equals(oldHit.fields, newHit.fields));
} else {
ScoreDoc oldHit = oldDocs[i];
ScoreDoc newHit = newDocs[i];
assertEquals(oldHit.doc, newHit.doc);
assertEquals(oldHit.score, newHit.score, 0.00001);
}
}
}
}