package org.apache.lucene.codecs.lucene3x; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; public class TestTermInfosReaderIndex extends LuceneTestCase { private static int NUMBER_OF_DOCUMENTS; private static int NUMBER_OF_FIELDS; private static TermInfosReaderIndex index; private static Directory directory; private static SegmentTermEnum termEnum; private static int indexDivisor; private static int termIndexInterval; private static IndexReader reader; private static List<Term> sampleTerms; /** we will manually instantiate preflex-rw here */ @BeforeClass public static void beforeClass() throws Exception { LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); termIndexInterval = config.getTermIndexInterval(); indexDivisor = _TestUtil.nextInt(random(), 1, 10); NUMBER_OF_DOCUMENTS = atLeast(100); NUMBER_OF_FIELDS = atLeast(Math.max(10, 3*termIndexInterval*indexDivisor/NUMBER_OF_DOCUMENTS)); directory = newDirectory(); config.setCodec(new PreFlexRWCodec()); LogMergePolicy mp = newLogMergePolicy(); // turn off compound file, this test will open some index files directly. mp.setUseCompoundFile(false); config.setMergePolicy(mp); populate(directory, config); DirectoryReader r0 = IndexReader.open(directory); SegmentReader r = LuceneTestCase.getOnlySegmentReader(r0); String segment = r.getSegmentName(); r.close(); FieldInfosReader infosReader = new PreFlexRWCodec().fieldInfosFormat().getFieldInfosReader(); FieldInfos fieldInfos = infosReader.read(directory, segment, IOContext.READONCE); String segmentFileName = IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); long tiiFileLength = directory.fileLength(segmentFileName); IndexInput input = directory.openInput(segmentFileName, newIOContext(random())); termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), newIOContext(random())), fieldInfos, false); int totalIndexInterval = termEnum.indexInterval * indexDivisor; SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true); index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval); indexEnum.close(); input.close(); reader = IndexReader.open(directory); sampleTerms = sample(random(),reader,1000); } @AfterClass public static void afterClass() throws Exception { termEnum.close(); reader.close(); directory.close(); termEnum = null; reader = null; directory = null; index = null; sampleTerms = null; } public void testSeekEnum() throws CorruptIndexException, IOException { int indexPosition = 3; SegmentTermEnum clone = termEnum.clone(); Term term = findTermThatWouldBeAtIndex(clone, indexPosition); SegmentTermEnum enumerator = clone; index.seekEnum(enumerator, indexPosition); assertEquals(term, enumerator.term()); clone.close(); } public void testCompareTo() throws IOException { Term term = new Term("field" + random().nextInt(NUMBER_OF_FIELDS) ,getText()); for (int i = 0; i < index.length(); i++) { Term t = index.getTerm(i); int compareTo = term.compareTo(t); assertEquals(compareTo, index.compareTo(term, i)); } } public void testRandomSearchPerformance() throws CorruptIndexException, IOException { IndexSearcher searcher = new IndexSearcher(reader); for (Term t : sampleTerms) { TermQuery query = new TermQuery(t); TopDocs topDocs = searcher.search(query, 10); assertTrue(topDocs.totalHits > 0); } } private static List<Term> sample(Random random, IndexReader reader, int size) throws IOException { List<Term> sample = new ArrayList<Term>(); Fields fields = MultiFields.getFields(reader); for (String field : fields) { Terms terms = fields.terms(field); assertNotNull(terms); TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { if (sample.size() >= size) { int pos = random.nextInt(size); sample.set(pos, new Term(field, termsEnum.term())); } else { sample.add(new Term(field, termsEnum.term())); } } } Collections.shuffle(sample); return sample; } private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException { int termPosition = index * termIndexInterval * indexDivisor; for (int i = 0; i < termPosition; i++) { // TODO: this test just uses random terms, so this is always possible assumeTrue("ran out of terms", termEnum.next()); } final Term term = termEnum.term(); // An indexed term is only written when the term after // it exists, so, if the number of terms is 0 mod // termIndexInterval, the last index term will not be // written; so we require a term after this term // as well: assumeTrue("ran out of terms", termEnum.next()); return term; } private static void populate(Directory directory, IndexWriterConfig config) throws CorruptIndexException, LockObtainFailedException, IOException { RandomIndexWriter writer = new RandomIndexWriter(random(), directory, config); for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { Document document = new Document(); for (int f = 0; f < NUMBER_OF_FIELDS; f++) { document.add(newStringField("field" + f, getText(), Field.Store.NO)); } writer.addDocument(document); } writer.forceMerge(1); writer.close(); } private static String getText() { return Long.toString(random().nextLong(),Character.MAX_RADIX); } }