package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.PackedLongDocValuesField; import org.apache.lucene.document.SortedBytesDocValuesField; import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util._TestUtil; import org.junit.BeforeClass; /** * Unit tests for sorting code. * * <p>Created: Feb 17, 2004 4:55:10 PM * * @since lucene 1.4 */ public class TestSort extends LuceneTestCase { // true if our codec supports docvalues: true unless codec is preflex (3.x) boolean supportsDocValues = Codec.getDefault().getName().equals("Lucene3x") == false; private static int NUM_STRINGS; private IndexSearcher full; private IndexSearcher searchX; private IndexSearcher searchY; private Query queryX; private Query queryY; private Query queryA; private Query queryE; private Query queryF; private Query queryG; private Query queryM; private Sort sort; @BeforeClass public static void beforeClass() { NUM_STRINGS = atLeast(500); } // document data: // the tracer field is used to determine which document was hit // the contents field is used to search and sort by relevance // the int field to sort by int // the float field to sort by float // the string field to sort by string // the i18n field includes accented characters for testing locale-specific sorting private String[][] data = new String[][] { // tracer contents int float string custom i18n long double, short, byte, 'custom parser encoding' { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J"},//A, x { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I"},//B, y { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999","40.00002343", "125", "15", "H"},//C, x { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", String.valueOf(Long.MAX_VALUE),String.valueOf(Double.MIN_VALUE), String.valueOf(Short.MIN_VALUE), String.valueOf(Byte.MIN_VALUE), "G"},//D, y { "E", "x a b c d", "5", "2f", "h", "B-8", "peach", String.valueOf(Long.MIN_VALUE),String.valueOf(Double.MAX_VALUE), String.valueOf(Short.MAX_VALUE), String.valueOf(Byte.MAX_VALUE), "F"},//E,x { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E"},//F,y { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D"},//G,x { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C"},//H,y { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B"},//I,x { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A"},//J,y { "W", "g", "1", null, null, null, null, null, null, null, null, null}, { "X", "g", "1", "0.1", null, null, null, null, null, null, null, null}, { "Y", "g", "1", "0.2", null, null, null, null, null, null, null, null}, { "Z", "f g", null, null, null, null, null, null, null, null, null, null}, // Sort Missing first/last { "a", "m", null, null, null, null, null, null, null, null, null, null}, { "b", "m", "4", "4.0", "4", null, null, "4", "4", "4", "4", null}, { "c", "m", "5", "5.0", "5", null, null, "5", "5", "5", "5", null}, { "d", "m", null, null, null, null, null, null, null, null, null, null} }; // create an index of all the documents, or just the x, or just the y documents private IndexSearcher getIndex (boolean even, boolean odd) throws IOException { Directory indexStore = newDirectory(); dirs.add(indexStore); RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); final DocValues.Type stringDVType; if (dvStringSorted) { // Index sorted stringDVType = random().nextBoolean() ? DocValues.Type.BYTES_VAR_SORTED : DocValues.Type.BYTES_FIXED_SORTED; } else { // Index non-sorted if (random().nextBoolean()) { // Fixed stringDVType = random().nextBoolean() ? DocValues.Type.BYTES_FIXED_STRAIGHT : DocValues.Type.BYTES_FIXED_DEREF; } else { // Var stringDVType = random().nextBoolean() ? DocValues.Type.BYTES_VAR_STRAIGHT : DocValues.Type.BYTES_VAR_DEREF; } } FieldType ft1 = new FieldType(); ft1.setStored(true); FieldType ft2 = new FieldType(); ft2.setIndexed(true); for (int i=0; i<data.length; ++i) { if (((i%2)==0 && even) || ((i%2)==1 && odd)) { Document doc = new Document(); doc.add (new Field ("tracer", data[i][0], ft1)); doc.add (new TextField ("contents", data[i][1], Field.Store.NO)); if (data[i][2] != null) { doc.add(new StringField ("int", data[i][2], Field.Store.NO)); if (supportsDocValues) { doc.add(new PackedLongDocValuesField("int", Integer.parseInt(data[i][2]))); } } if (data[i][3] != null) { doc.add(new StringField ("float", data[i][3], Field.Store.NO)); if (supportsDocValues) { doc.add(new FloatDocValuesField("float", Float.parseFloat(data[i][3]))); } } if (data[i][4] != null) { doc.add(new StringField ("string", data[i][4], Field.Store.NO)); if (supportsDocValues) { switch(stringDVType) { case BYTES_FIXED_SORTED: doc.add(new SortedBytesDocValuesField("string", new BytesRef(data[i][4]), true)); break; case BYTES_VAR_SORTED: doc.add(new SortedBytesDocValuesField("string", new BytesRef(data[i][4]), false)); break; case BYTES_FIXED_STRAIGHT: doc.add(new StraightBytesDocValuesField("string", new BytesRef(data[i][4]), true)); break; case BYTES_VAR_STRAIGHT: doc.add(new StraightBytesDocValuesField("string", new BytesRef(data[i][4]), false)); break; case BYTES_FIXED_DEREF: doc.add(new DerefBytesDocValuesField("string", new BytesRef(data[i][4]), true)); break; case BYTES_VAR_DEREF: doc.add(new DerefBytesDocValuesField("string", new BytesRef(data[i][4]), false)); break; default: throw new IllegalStateException("unknown type " + stringDVType); } } } if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5], Field.Store.NO)); if (data[i][6] != null) doc.add (new StringField ("i18n", data[i][6], Field.Store.NO)); if (data[i][7] != null) doc.add (new StringField ("long", data[i][7], Field.Store.NO)); if (data[i][8] != null) { doc.add(new StringField ("double", data[i][8], Field.Store.NO)); if (supportsDocValues) { doc.add(new DoubleDocValuesField("double", Double.parseDouble(data[i][8]))); } } if (data[i][9] != null) doc.add (new StringField ("short", data[i][9], Field.Store.NO)); if (data[i][10] != null) doc.add (new StringField ("byte", data[i][10], Field.Store.NO)); if (data[i][11] != null) doc.add (new StringField ("parser", data[i][11], Field.Store.NO)); for(IndexableField f : doc.getFields()) { if (f.fieldType().indexed() && !f.fieldType().omitNorms()) { ((Field) f).setBoost(2.0f); } } writer.addDocument (doc); } } IndexReader reader = writer.getReader(); writer.close (); IndexSearcher s = newSearcher(reader); return s; } private IndexSearcher getFullIndex() throws IOException { return getIndex (true, true); } private IndexSearcher getFullStrings() throws IOException { Directory indexStore = newDirectory(); dirs.add(indexStore); IndexWriter writer = new IndexWriter( indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())). setMergePolicy(newLogMergePolicy(97)) ); FieldType onlyStored = new FieldType(); onlyStored.setStored(true); final int fixedLen = getRandomNumber(2, 8); final int fixedLen2 = getRandomNumber(1, 4); for (int i=0; i<NUM_STRINGS; i++) { Document doc = new Document(); String num = getRandomCharString(getRandomNumber(2, 8), 48, 52); doc.add (new Field ("tracer", num, onlyStored)); //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); doc.add(new StringField("string", num, Field.Store.NO)); if (supportsDocValues) { doc.add(new SortedBytesDocValuesField("string", new BytesRef(num))); } String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50); doc.add(new StringField ("string2", num2, Field.Store.NO)); if (supportsDocValues) { doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2))); } doc.add (new Field ("tracer2", num2, onlyStored)); for(IndexableField f2 : doc.getFields()) { if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) { ((Field) f2).setBoost(2.0f); } } String numFixed = getRandomCharString(fixedLen, 48, 52); doc.add (new Field ("fixed_tracer", numFixed, onlyStored)); //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); doc.add(new StringField("string_fixed", numFixed, Field.Store.NO)); if (supportsDocValues) { doc.add(new SortedBytesDocValuesField("string_fixed", new BytesRef(numFixed), true)); } String num2Fixed = getRandomCharString(fixedLen2, 48, 52); doc.add(new StringField ("string2_fixed", num2Fixed, Field.Store.NO)); if (supportsDocValues) { doc.add(new SortedBytesDocValuesField("string2_fixed", new BytesRef(num2Fixed), true)); } doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored)); for(IndexableField f2 : doc.getFields()) { if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) { ((Field) f2).setBoost(2.0f); } } writer.addDocument (doc); } //writer.forceMerge(1); //System.out.println(writer.getSegmentCount()); writer.close(); IndexReader reader = DirectoryReader.open(indexStore); return newSearcher(reader); } public String getRandomNumberString(int num, int low, int high) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < num; i++) { sb.append(getRandomNumber(low, high)); } return sb.toString(); } public String getRandomCharString(int num) { return getRandomCharString(num, 48, 122); } public String getRandomCharString(int num, int start, int end) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < num; i++) { sb.append(new Character((char) getRandomNumber(start, end))); } return sb.toString(); } public int getRandomNumber(final int low, final int high) { int randInt = (Math.abs(random().nextInt()) % (high - low)) + low; return randInt; } private IndexSearcher getXIndex() throws IOException { return getIndex (true, false); } private IndexSearcher getYIndex() throws IOException { return getIndex (false, true); } private IndexSearcher getEmptyIndex() throws IOException { return getIndex (false, false); } // Set to true if the DV "string" field is indexed as a // sorted source: private boolean dvStringSorted; @Override public void setUp() throws Exception { super.setUp(); dvStringSorted = random().nextBoolean(); full = getFullIndex(); searchX = getXIndex(); searchY = getYIndex(); queryX = new TermQuery (new Term ("contents", "x")); queryY = new TermQuery (new Term ("contents", "y")); queryA = new TermQuery (new Term ("contents", "a")); queryE = new TermQuery (new Term ("contents", "e")); queryF = new TermQuery (new Term ("contents", "f")); queryG = new TermQuery (new Term ("contents", "g")); queryM = new TermQuery (new Term ("contents", "m")); sort = new Sort(); } private ArrayList<Directory> dirs = new ArrayList<Directory>(); @Override public void tearDown() throws Exception { full.reader.close(); searchX.reader.close(); searchY.reader.close(); for (Directory dir : dirs) dir.close(); super.tearDown(); } // test the sorts by score and document number public void testBuiltInSorts() throws Exception { sort = new Sort(); assertMatches (full, queryX, sort, "ACEGI"); assertMatches (full, queryY, sort, "BDFHJ"); sort.setSort(SortField.FIELD_DOC); assertMatches (full, queryX, sort, "ACEGI"); assertMatches (full, queryY, sort, "BDFHJ"); } private static SortField useDocValues(SortField field) { field.setUseIndexValues(true); return field; } // test sorts where the type of field is specified public void testTypedSort() throws Exception { sort.setSort (new SortField ("int", SortField.Type.INT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IGAEC"); assertMatches (full, queryY, sort, "DHFJB"); sort.setSort (new SortField ("float", SortField.Type.FLOAT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "GCIEA"); assertMatches (full, queryY, sort, "DHJFB"); sort.setSort (new SortField ("long", SortField.Type.LONG), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "EACGI"); assertMatches (full, queryY, sort, "FBJHD"); sort.setSort (new SortField ("double", SortField.Type.DOUBLE), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AGICE"); assertMatches (full, queryY, sort, "DJHBF"); sort.setSort (new SortField ("byte", SortField.Type.BYTE), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "CIGAE"); assertMatches (full, queryY, sort, "DHFBJ"); sort.setSort (new SortField ("short", SortField.Type.SHORT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IAGCE"); assertMatches (full, queryY, sort, "DFHBJ"); sort.setSort (new SortField ("string", SortField.Type.STRING), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AIGEC"); assertMatches (full, queryY, sort, "DJHFB"); if (supportsDocValues) { sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IGAEC"); assertMatches (full, queryY, sort, "DHFJB"); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "GCIEA"); assertMatches (full, queryY, sort, "DHJFB"); sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AGICE"); assertMatches (full, queryY, sort, "DJHBF"); sort.setSort (useDocValues(new SortField ("string", getDVStringSortType())), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AIGEC"); assertMatches (full, queryY, sort, "DJHFB"); } } private SortField.Type getDVStringSortType() { if (dvStringSorted) { // If you index as sorted source you can still sort by // value instead: return random().nextBoolean() ? SortField.Type.STRING : SortField.Type.STRING_VAL; } else { return SortField.Type.STRING_VAL; } } private static class SortMissingLastTestHelper { final SortField sortField; final Object min; final Object max; SortMissingLastTestHelper(SortField sortField, Object min, Object max) { this.sortField = sortField; this.min = min; this.max = max; } } // test sorts where the type of field is specified public void testSortMissingLast() throws Exception { @SuppressWarnings("boxing") SortMissingLastTestHelper[] ascendTesters = new SortMissingLastTestHelper[] { new SortMissingLastTestHelper( new SortField( "byte", SortField.Type.BYTE ), Byte.MIN_VALUE, Byte.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "short", SortField.Type.SHORT ), Short.MIN_VALUE, Short.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "int", SortField.Type.INT ), Integer.MIN_VALUE, Integer.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "long", SortField.Type.LONG ), Long.MIN_VALUE, Long.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "float", SortField.Type.FLOAT ), Float.MIN_VALUE, Float.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "double", SortField.Type.DOUBLE ), Double.MIN_VALUE, Double.MAX_VALUE ), }; @SuppressWarnings("boxing") SortMissingLastTestHelper[] descendTesters = new SortMissingLastTestHelper[] { new SortMissingLastTestHelper( new SortField( "byte", SortField.Type.BYTE, true ), Byte.MIN_VALUE, Byte.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "short", SortField.Type.SHORT, true ), Short.MIN_VALUE, Short.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "int", SortField.Type.INT, true ), Integer.MIN_VALUE, Integer.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "long", SortField.Type.LONG, true ), Long.MIN_VALUE, Long.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "float", SortField.Type.FLOAT, true ), Float.MIN_VALUE, Float.MAX_VALUE ), new SortMissingLastTestHelper( new SortField( "double", SortField.Type.DOUBLE, true ), Double.MIN_VALUE, Double.MAX_VALUE ), }; // Default order: ascending for(SortMissingLastTestHelper t : ascendTesters) { sort.setSort(t.sortField, SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "adbc"); sort.setSort(t.sortField.setMissingValue(t.max), SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "bcad"); sort.setSort(t.sortField.setMissingValue(t.min), SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "adbc"); } // Reverse order: descending (Note: Order for un-valued documents remains the same due to tie breaker: a,d) for(SortMissingLastTestHelper t : descendTesters) { sort.setSort(t.sortField, SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "cbad"); sort.setSort(t.sortField.setMissingValue( t.max ), SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "adcb"); sort.setSort(t.sortField.setMissingValue( t.min ), SortField.FIELD_DOC); assertMatches("sortField:"+t.sortField, full, queryM, sort, "cbad"); } } /** * Test String sorting: small queue to many matches, multi field sort, reverse sort */ public void testStringSort() throws Exception { // Normal string field, var length sort.setSort( new SortField("string", SortField.Type.STRING), new SortField("string2", SortField.Type.STRING, true), SortField.FIELD_DOC); verifyStringSort(sort); // Normal string field, fixed length sort.setSort( new SortField("string_fixed", SortField.Type.STRING), new SortField("string2_fixed", SortField.Type.STRING, true), SortField.FIELD_DOC); verifyStringSort(sort); // Doc values field, var length assumeFalse("cannot work with preflex codec", "Lucene3x".equals(Codec.getDefault().getName())); sort.setSort( useDocValues(new SortField("string", getDVStringSortType())), useDocValues(new SortField("string2", getDVStringSortType(), true)), SortField.FIELD_DOC); verifyStringSort(sort); // Doc values field, fixed length sort.setSort( useDocValues(new SortField("string_fixed", getDVStringSortType())), useDocValues(new SortField("string2_fixed", getDVStringSortType(), true)), SortField.FIELD_DOC); verifyStringSort(sort); } private void verifyStringSort(Sort sort) throws Exception { final IndexSearcher searcher = getFullStrings(); final ScoreDoc[] result = searcher.search(new MatchAllDocsQuery(), null, _TestUtil.nextInt(random(), 500, searcher.getIndexReader().maxDoc()), sort).scoreDocs; StringBuilder buff = new StringBuilder(); int n = result.length; String last = null; String lastSub = null; int lastDocId = 0; boolean fail = false; final String fieldSuffix = sort.getSort()[0].getField().endsWith("_fixed") ? "_fixed" : ""; for (int x = 0; x < n; ++x) { Document doc2 = searcher.doc(result[x].doc); IndexableField[] v = doc2.getFields("tracer" + fieldSuffix); IndexableField[] v2 = doc2.getFields("tracer2" + fieldSuffix); for (int j = 0; j < v.length; ++j) { buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+")\n"); if (last != null) { int cmp = v[j].stringValue().compareTo(last); if (!(cmp >= 0)) { // ensure first field is in order fail = true; System.out.println("fail:" + v[j] + " < " + last); buff.append(" WRONG tracer\n"); } if (cmp == 0) { // ensure second field is in reverse order cmp = v2[j].stringValue().compareTo(lastSub); if (cmp > 0) { fail = true; System.out.println("rev field fail:" + v2[j] + " > " + lastSub); buff.append(" WRONG tracer2\n"); } else if(cmp == 0) { // ensure docid is in order if (result[x].doc < lastDocId) { fail = true; System.out.println("doc fail:" + result[x].doc + " > " + lastDocId); buff.append(" WRONG docID\n"); } } } } last = v[j].stringValue(); lastSub = v2[j].stringValue(); lastDocId = result[x].doc; } } if (fail) { System.out.println("topn field1(field2)(docID):\n" + buff); } assertFalse("Found sort results out of order", fail); searcher.getIndexReader().close(); } /** * test sorts where the type of field is specified and a custom field parser * is used, that uses a simple char encoding. The sorted string contains a * character beginning from 'A' that is mapped to a numeric value using some * "funny" algorithm to be different for each data type. */ public void testCustomFieldParserSort() throws Exception { // since tests explicilty uses different parsers on the same fieldname // we explicitly check/purge the FieldCache between each assertMatch FieldCache fc = FieldCache.DEFAULT; sort.setSort (new SortField ("parser", new FieldCache.IntParser(){ public final int parseInt(final BytesRef term) { return (term.bytes[term.offset]-'A') * 123456; } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " IntParser"); fc.purgeAllCaches(); sort.setSort (new SortField ("parser", new FieldCache.FloatParser(){ public final float parseFloat(final BytesRef term) { return (float) Math.sqrt( term.bytes[term.offset] ); } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " FloatParser"); fc.purgeAllCaches(); sort.setSort (new SortField ("parser", new FieldCache.LongParser(){ public final long parseLong(final BytesRef term) { return (term.bytes[term.offset]-'A') * 1234567890L; } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " LongParser"); fc.purgeAllCaches(); sort.setSort (new SortField ("parser", new FieldCache.DoubleParser(){ public final double parseDouble(final BytesRef term) { return Math.pow( term.bytes[term.offset], (term.bytes[term.offset]-'A') ); } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " DoubleParser"); fc.purgeAllCaches(); sort.setSort (new SortField ("parser", new FieldCache.ByteParser(){ public final byte parseByte(final BytesRef term) { return (byte) (term.bytes[term.offset]-'A'); } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " ByteParser"); fc.purgeAllCaches(); sort.setSort (new SortField ("parser", new FieldCache.ShortParser(){ public final short parseShort(final BytesRef term) { return (short) (term.bytes[term.offset]-'A'); } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " ShortParser"); fc.purgeAllCaches(); } // test sorts when there's nothing in the index public void testEmptyIndex() throws Exception { IndexSearcher empty = getEmptyIndex(); sort = new Sort(); assertMatches (empty, queryX, sort, ""); sort.setSort(SortField.FIELD_DOC); assertMatches (empty, queryX, sort, ""); sort.setSort (new SortField ("int", SortField.Type.INT), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); sort.setSort (new SortField ("string", SortField.Type.STRING, true), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) ); assertMatches (empty, queryX, sort, ""); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), new SortField ("string", SortField.Type.STRING) ); assertMatches (empty, queryX, sort, ""); sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType())) ); assertMatches (empty, queryX, sort, ""); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType())) ); assertMatches (empty, queryX, sort, ""); } static class MyFieldComparator extends FieldComparator<Integer> { int[] docValues; int[] slotValues; int bottomValue; MyFieldComparator(int numHits) { slotValues = new int[numHits]; } @Override public void copy(int slot, int doc) { slotValues[slot] = docValues[doc]; } @Override public int compare(int slot1, int slot2) { // values are small enough that overflow won't happen return slotValues[slot1] - slotValues[slot2]; } @Override public int compareBottom(int doc) { return bottomValue - docValues[doc]; } @Override public void setBottom(int bottom) { bottomValue = slotValues[bottom]; } private static final FieldCache.IntParser testIntParser = new FieldCache.IntParser() { public final int parseInt(final BytesRef term) { return (term.bytes[term.offset]-'A') * 123456; } }; @Override public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException { docValues = FieldCache.DEFAULT.getInts(context.reader(), "parser", testIntParser, false); return this; } @Override public Integer value(int slot) { return Integer.valueOf(slotValues[slot]); } @Override public int compareDocToValue(int doc, Integer valueObj) { final int value = valueObj.intValue(); final int docValue = docValues[doc]; // values are small enough that overflow won't happen return docValue - value; } } static class MyFieldComparatorSource extends FieldComparatorSource { @Override public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { return new MyFieldComparator(numHits); } } // Test sorting w/ custom FieldComparator public void testNewCustomFieldParserSort() throws Exception { sort.setSort (new SortField ("parser", new MyFieldComparatorSource())); assertMatches (full, queryA, sort, "JIHGFEDCBA"); } // test sorts in reverse public void testReverseSort() throws Exception { sort.setSort (new SortField (null, SortField.Type.SCORE, true), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IEGCA"); assertMatches (full, queryY, sort, "JFHDB"); sort.setSort (new SortField (null, SortField.Type.DOC, true)); assertMatches (full, queryX, sort, "IGECA"); assertMatches (full, queryY, sort, "JHFDB"); sort.setSort (new SortField ("int", SortField.Type.INT, true) ); assertMatches (full, queryX, sort, "CAEGI"); assertMatches (full, queryY, sort, "BJFHD"); sort.setSort (new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (full, queryX, sort, "AECIG"); assertMatches (full, queryY, sort, "BFJHD"); sort.setSort (new SortField ("string", SortField.Type.STRING, true) ); assertMatches (full, queryX, sort, "CEGIA"); assertMatches (full, queryY, sort, "BFHJD"); if (supportsDocValues) { sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)) ); assertMatches (full, queryX, sort, "CAEGI"); assertMatches (full, queryY, sort, "BJFHD"); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) ); assertMatches (full, queryX, sort, "AECIG"); assertMatches (full, queryY, sort, "BFJHD"); sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) ); assertMatches (full, queryX, sort, "CEGIA"); assertMatches (full, queryY, sort, "BFHJD"); } } // test sorting when the sort field is empty (undefined) for some of the documents public void testEmptyFieldSort() throws Exception { // NOTE: do not test DocValues fields here, since you // can't sort when some documents don't have the field sort.setSort (new SortField ("string", SortField.Type.STRING) ); assertMatches (full, queryF, sort, "ZJI"); sort.setSort (new SortField ("string", SortField.Type.STRING, true) ); assertMatches (full, queryF, sort, "IJZ"); sort.setSort (new SortField ("int", SortField.Type.INT) ); assertMatches (full, queryF, sort, "IZJ"); sort.setSort (new SortField ("int", SortField.Type.INT, true) ); assertMatches (full, queryF, sort, "JZI"); sort.setSort (new SortField ("float", SortField.Type.FLOAT) ); assertMatches (full, queryF, sort, "ZJI"); // using a nonexisting field as first sort key shouldn't make a difference: sort.setSort (new SortField ("nosuchfield", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT) ); assertMatches (full, queryF, sort, "ZJI"); sort.setSort (new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (full, queryF, sort, "IJZ"); // When a field is null for both documents, the next SortField should be used. sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT) ); assertMatches (full, queryG, sort, "ZWXY"); // Reverse the last criterium to make sure the test didn't pass by chance sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (full, queryG, sort, "ZYXW"); // Do the same for a ParallelMultiSearcher ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random(), 2, 8), new NamedThreadFactory("testEmptyFieldSort")); IndexSearcher parallelSearcher=new IndexSearcher (full.getIndexReader(), exec); sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT) ); assertMatches (parallelSearcher, queryG, sort, "ZWXY"); sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (parallelSearcher, queryG, sort, "ZYXW"); exec.shutdown(); exec.awaitTermination(1000, TimeUnit.MILLISECONDS); } // test sorts using a series of fields public void testSortCombos() throws Exception { sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("float", SortField.Type.FLOAT) ); assertMatches (full, queryX, sort, "IGEAC"); sort.setSort (new SortField ("int", SortField.Type.INT, true), new SortField (null, SortField.Type.DOC, true) ); assertMatches (full, queryX, sort, "CEAGI"); sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) ); assertMatches (full, queryX, sort, "GICEA"); if (supportsDocValues) { sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), useDocValues(new SortField ("float", SortField.Type.FLOAT))); assertMatches (full, queryX, sort, "IGEAC"); sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)), useDocValues(new SortField (null, SortField.Type.DOC, true))); assertMatches (full, queryX, sort, "CEAGI"); sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType()))); assertMatches (full, queryX, sort, "GICEA"); } } // test a variety of sorts using a parallel multisearcher public void testParallelMultiSort() throws Exception { ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random(), 2, 8), new NamedThreadFactory("testParallelMultiSort")); IndexSearcher searcher = new IndexSearcher( new MultiReader(searchX.getIndexReader(), searchY.getIndexReader()), exec); runMultiSorts(searcher, false); exec.shutdown(); exec.awaitTermination(1000, TimeUnit.MILLISECONDS); } public void testTopDocsScores() throws Exception { // There was previously a bug in FieldSortedHitQueue.maxscore when only a single // doc was added. That is what the following tests for. Sort sort = new Sort(); int nDocs=10; // try to pick a query that will result in an unnormalized // score greater than 1 to test for correct normalization final TopDocs docs1 = full.search(queryE,null,nDocs,sort,true,true); // a filter that only allows through the first hit Filter filt = new Filter() { @Override public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { assertNull("acceptDocs should be null, as we have no deletions", acceptDocs); BitSet bs = new BitSet(context.reader().maxDoc()); bs.set(0, context.reader().maxDoc()); bs.set(docs1.scoreDocs[0].doc); return new DocIdBitSet(bs); } }; TopDocs docs2 = full.search(queryE, filt, nDocs, sort,true,true); assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6); } public void testSortWithoutFillFields() throws Exception { // There was previously a bug in TopFieldCollector when fillFields was set // to false - the same doc and score was set in ScoreDoc[] array. This test // asserts that if fillFields is false, the documents are set properly. It // does not use Searcher's default search methods (with Sort) since all set // fillFields to true. Sort[] sort = new Sort[] { new Sort(SortField.FIELD_DOC), new Sort() }; for (int i = 0; i < sort.length; i++) { Query q = new MatchAllDocsQuery(); TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, false, false, false, true); full.search(q, tdc); ScoreDoc[] sd = tdc.topDocs().scoreDocs; for (int j = 1; j < sd.length; j++) { assertTrue(sd[j].doc != sd[j - 1].doc); } } } public void testSortWithoutScoreTracking() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; for (int i = 0; i < sort.length; i++) { Query q = new MatchAllDocsQuery(); TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, false, false, true); full.search(q, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; for (int j = 0; j < sd.length; j++) { assertTrue(Float.isNaN(sd[j].score)); } assertTrue(Float.isNaN(td.getMaxScore())); } } public void testSortWithScoreNoMaxScoreTracking() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; for (int i = 0; i < sort.length; i++) { Query q = new MatchAllDocsQuery(); TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, false, true); full.search(q, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; for (int j = 0; j < sd.length; j++) { assertTrue(!Float.isNaN(sd[j].score)); } assertTrue(Float.isNaN(td.getMaxScore())); } } // MultiComparatorScoringNoMaxScoreCollector public void testSortWithScoreNoMaxScoreTrackingMulti() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE) }; for (int i = 0; i < sort.length; i++) { Query q = new MatchAllDocsQuery(); TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, false, true); full.search(q, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; for (int j = 0; j < sd.length; j++) { assertTrue(!Float.isNaN(sd[j].score)); } assertTrue(Float.isNaN(td.getMaxScore())); } } public void testSortWithScoreAndMaxScoreTracking() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; for (int i = 0; i < sort.length; i++) { Query q = new MatchAllDocsQuery(); TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, true, true); full.search(q, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; for (int j = 0; j < sd.length; j++) { assertTrue(!Float.isNaN(sd[j].score)); } assertTrue(!Float.isNaN(td.getMaxScore())); } } public void testOutOfOrderDocsScoringSort() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; boolean[][] tfcOptions = new boolean[][] { new boolean[] { false, false, false }, new boolean[] { false, false, true }, new boolean[] { false, true, false }, new boolean[] { false, true, true }, new boolean[] { true, false, false }, new boolean[] { true, false, true }, new boolean[] { true, true, false }, new boolean[] { true, true, true }, }; String[] actualTFCClasses = new String[] { "OutOfOrderOneComparatorNonScoringCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorScoringNoMaxScoreCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorNonScoringCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorScoringNoMaxScoreCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector" }; BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.setMinimumNumberShouldMatch(1); for (int i = 0; i < sort.length; i++) { for (int j = 0; j < tfcOptions.length; j++) { TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, tfcOptions[j][0], tfcOptions[j][1], tfcOptions[j][2], false); assertTrue(tdc.getClass().getName().endsWith("$"+actualTFCClasses[j])); full.search(bq, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; assertEquals(10, sd.length); } } } // OutOfOrderMulti*Collector public void testOutOfOrderDocsScoringSortMulti() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE) }; boolean[][] tfcOptions = new boolean[][] { new boolean[] { false, false, false }, new boolean[] { false, false, true }, new boolean[] { false, true, false }, new boolean[] { false, true, true }, new boolean[] { true, false, false }, new boolean[] { true, false, true }, new boolean[] { true, true, false }, new boolean[] { true, true, true }, }; String[] actualTFCClasses = new String[] { "OutOfOrderMultiComparatorNonScoringCollector", "OutOfOrderMultiComparatorScoringMaxScoreCollector", "OutOfOrderMultiComparatorScoringNoMaxScoreCollector", "OutOfOrderMultiComparatorScoringMaxScoreCollector", "OutOfOrderMultiComparatorNonScoringCollector", "OutOfOrderMultiComparatorScoringMaxScoreCollector", "OutOfOrderMultiComparatorScoringNoMaxScoreCollector", "OutOfOrderMultiComparatorScoringMaxScoreCollector" }; BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.setMinimumNumberShouldMatch(1); for (int i = 0; i < sort.length; i++) { for (int j = 0; j < tfcOptions.length; j++) { TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, tfcOptions[j][0], tfcOptions[j][1], tfcOptions[j][2], false); assertTrue(tdc.getClass().getName().endsWith("$"+actualTFCClasses[j])); full.search(bq, tdc); TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; assertEquals(10, sd.length); } } } public void testSortWithScoreAndMaxScoreTrackingNoResults() throws Exception { // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; for (int i = 0; i < sort.length; i++) { TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, true, true); TopDocs td = tdc.topDocs(); assertEquals(0, td.totalHits); assertTrue(Float.isNaN(td.getMaxScore())); } } // runs a variety of sorts useful for multisearchers private void runMultiSorts(IndexSearcher multi, boolean isFull) throws Exception { sort.setSort(SortField.FIELD_DOC); String expected = isFull ? "ABCDEFGHIJ" : "ACEGIBDFHJ"; assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField ("int", SortField.Type.INT)); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField ("int", SortField.Type.INT), SortField.FIELD_DOC); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField("int", SortField.Type.INT)); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField ("float", SortField.Type.FLOAT), SortField.FIELD_DOC); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("float", SortField.Type.FLOAT)); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("string", SortField.Type.STRING)); assertMatches(multi, queryA, sort, "DJAIHGFEBC"); sort.setSort(new SortField("int", SortField.Type.INT, true)); expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField("float", SortField.Type.FLOAT, true)); assertMatches(multi, queryA, sort, "BAFECIJHDG"); sort.setSort(new SortField("string", SortField.Type.STRING, true)); assertMatches(multi, queryA, sort, "CBEFGHIAJD"); sort.setSort(new SortField("int", SortField.Type.INT),new SortField("float", SortField.Type.FLOAT)); assertMatches(multi, queryA, sort, "IDHFGJEABC"); sort.setSort(new SortField("float", SortField.Type.FLOAT),new SortField("string", SortField.Type.STRING)); assertMatches(multi, queryA, sort, "GDHJICEFAB"); sort.setSort(new SortField ("int", SortField.Type.INT)); assertMatches(multi, queryF, sort, "IZJ"); sort.setSort(new SortField ("int", SortField.Type.INT, true)); assertMatches(multi, queryF, sort, "JZI"); sort.setSort(new SortField ("float", SortField.Type.FLOAT)); assertMatches(multi, queryF, sort, "ZJI"); sort.setSort(new SortField ("string", SortField.Type.STRING)); assertMatches(multi, queryF, sort, "ZJI"); sort.setSort(new SortField ("string", SortField.Type.STRING, true)); assertMatches(multi, queryF, sort, "IJZ"); if (supportsDocValues) { sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(useDocValues(new SortField("int", SortField.Type.INT))); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); sort.setSort(useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT))); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(useDocValues(new SortField("int", SortField.Type.INT, true))); expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; assertMatches(multi, queryA, sort, expected); sort.setSort(useDocValues(new SortField("int", SortField.Type.INT)), useDocValues(new SortField("float", SortField.Type.FLOAT))); assertMatches(multi, queryA, sort, "IDHFGJEABC"); sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); assertMatches(multi, queryF, sort, "IZJ"); sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true))); assertMatches(multi, queryF, sort, "JZI"); sort.setSort(useDocValues(new SortField("string", getDVStringSortType()))); assertMatches(multi, queryA, sort, "DJAIHGFEBC"); sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true))); assertMatches(multi, queryA, sort, "CBEFGHIAJD"); sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType()))); assertMatches(multi, queryA, sort, "GDHJICEFAB"); sort.setSort(useDocValues(new SortField ("string", getDVStringSortType()))); assertMatches(multi, queryF, sort, "ZJI"); sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true))); assertMatches(multi, queryF, sort, "IJZ"); } // up to this point, all of the searches should have "sane" // FieldCache behavior, and should have reused hte cache in several cases assertSaneFieldCaches(getTestName() + " various"); // next we'll check Locale based (String[]) for 'string', so purge first FieldCache.DEFAULT.purgeAllCaches(); } private void assertMatches(IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException { assertMatches( null, searcher, query, sort, expectedResult ); } // make sure the documents returned by the search match the expected list private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException { //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort, true, true); ScoreDoc[] result = hits.scoreDocs; assertEquals(expectedResult.length(),hits.totalHits); StringBuilder buff = new StringBuilder(10); int n = result.length; for (int i=0; i<n; ++i) { Document doc = searcher.doc(result[i].doc); IndexableField[] v = doc.getFields("tracer"); for (int j=0; j<v.length; ++j) { buff.append (v[j].stringValue()); } } assertEquals(msg, expectedResult, buff.toString()); } public void testEmptyStringVsNullStringSort() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); doc.add(newStringField("f", "", Field.Store.NO)); doc.add(newStringField("t", "1", Field.Store.NO)); w.addDocument(doc); w.commit(); doc = new Document(); doc.add(newStringField("t", "1", Field.Store.NO)); w.addDocument(doc); IndexReader r = DirectoryReader.open(w, true); w.close(); IndexSearcher s = newSearcher(r); TopDocs hits = s.search(new TermQuery(new Term("t", "1")), null, 10, new Sort(new SortField("f", SortField.Type.STRING))); assertEquals(2, hits.totalHits); // null sorts first assertEquals(1, hits.scoreDocs[0].doc); assertEquals(0, hits.scoreDocs[1].doc); r.close(); dir.close(); } public void testLUCENE2142() throws IOException { Directory indexStore = newDirectory(); IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); for (int i=0; i<5; i++) { Document doc = new Document(); doc.add (new StringField ("string", "a"+i, Field.Store.NO)); doc.add (new StringField ("string", "b"+i, Field.Store.NO)); writer.addDocument (doc); } writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases writer.close(); sort.setSort( new SortField("string", SortField.Type.STRING), SortField.FIELD_DOC ); // this should not throw AIOOBE or RuntimeEx IndexReader reader = DirectoryReader.open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); searcher.search(new MatchAllDocsQuery(), null, 500, sort); reader.close(); indexStore.close(); } public void testCountingCollector() throws Exception { Directory indexStore = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore); for (int i=0; i<5; i++) { Document doc = new Document(); doc.add (new StringField ("string", "a"+i, Field.Store.NO)); doc.add (new StringField ("string", "b"+i, Field.Store.NO)); writer.addDocument (doc); } IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(reader); TotalHitCountCollector c = new TotalHitCountCollector(); searcher.search(new MatchAllDocsQuery(), null, c); assertEquals(5, c.getTotalHits()); reader.close(); indexStore.close(); } private static class RandomFilter extends Filter { private final Random random; private float density; private final List<BytesRef> docValues; public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>()); // density should be 0.0 ... 1.0 public RandomFilter(Random random, float density, List<BytesRef> docValues) { this.random = random; this.density = density; this.docValues = docValues; } @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final int maxDoc = context.reader().maxDoc(); final DocValues.Source idSource = context.reader().docValues("id").getSource(); assertNotNull(idSource); final FixedBitSet bits = new FixedBitSet(maxDoc); for(int docID=0;docID<maxDoc;docID++) { if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) { bits.set(docID); //System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID); matchValues.add(docValues.get((int) idSource.getInt(docID))); } } return bits; } } public void testRandomStringSort() throws Exception { Random random = new Random(random().nextLong()); assumeTrue("cannot work with Lucene3x codec", defaultCodecSupportsDocValues()); final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<String>(); final int maxLength = _TestUtil.nextInt(random, 5, 100); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { final String s; if (random.nextBoolean()) { s = _TestUtil.randomSimpleString(random, maxLength); } else { s = _TestUtil.randomUnicodeString(random, maxLength); } final BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } final Document doc = new Document(); doc.add(new SortedBytesDocValuesField("stringdv", br)); doc.add(newStringField("string", s, Field.Store.NO)); doc.add(new PackedLongDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } final IndexReader r = writer.getReader(); writer.close(); if (VERBOSE) { System.out.println(" reader=" + r); } final IndexSearcher s = newSearcher(r, false); final int ITERS = atLeast(100); for(int iter=0;iter<ITERS;iter++) { final boolean reverse = random.nextBoolean(); final TopFieldDocs hits; final SortField sf; if (random.nextBoolean()) { sf = new SortField("stringdv", SortField.Type.STRING, reverse); sf.setUseIndexValues(true); } else { sf = new SortField("string", SortField.Type.STRING, reverse); } final Sort sort = new Sort(sf); final int hitCount = _TestUtil.nextInt(random, 1, r.maxDoc() + 20); final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues); if (random.nextBoolean()) { hits = s.search(new ConstantScoreQuery(f), hitCount, sort); } else { hits = s.search(new MatchAllDocsQuery(), f, hitCount, sort); } if (VERBOSE) { System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse); } // Compute expected results: Collections.sort(f.matchValues); if (reverse) { Collections.reverse(f.matchValues); } final List<BytesRef> expected = f.matchValues; if (VERBOSE) { System.out.println(" expected:"); for(int idx=0;idx<expected.size();idx++) { System.out.println(" " + idx + ": " + expected.get(idx).utf8ToString()); if (idx == hitCount-1) { break; } } } if (VERBOSE) { System.out.println(" actual:"); for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) { final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; System.out.println(" " + hitIDX + ": " + ((BytesRef) fd.fields[0]).utf8ToString()); } } for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) { final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; assertEquals(expected.get(hitIDX), (BytesRef) fd.fields[0]); } } r.close(); dir.close(); } public void testMaxScore() throws Exception { Directory d = newDirectory(); // Not RIW because we need exactly 2 segs: IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); int id = 0; for(int seg=0;seg<2;seg++) { for(int docIDX=0;docIDX<10;docIDX++) { Document doc = new Document(); doc.add(newStringField("id", ""+docIDX, Field.Store.YES)); StringBuilder sb = new StringBuilder(); for(int i=0;i<id;i++) { sb.append(' '); sb.append("text"); } doc.add(newTextField("body", sb.toString(), Field.Store.NO)); w.addDocument(doc); id++; } w.commit(); } IndexReader r = DirectoryReader.open(w, true); w.close(); Query q = new TermQuery(new Term("body", "text")); IndexSearcher s = newSearcher(r); float maxScore = s.search(q , 10).getMaxScore(); assertEquals(maxScore, s.search(q, null, 3, Sort.INDEXORDER, random().nextBoolean(), true).getMaxScore(), 0.0); assertEquals(maxScore, s.search(q, null, 3, Sort.RELEVANCE, random().nextBoolean(), true).getMaxScore(), 0.0); assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, false)}), random().nextBoolean(), true).getMaxScore(), 0.0); assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, true)}), random().nextBoolean(), true).getMaxScore(), 0.0); r.close(); d.close(); } }