package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; public class TestTermInfosReaderIndex extends LuceneTestCase { private static final int NUMBER_OF_DOCUMENTS = 1000; private static final int NUMBER_OF_FIELDS = 100; private TermInfosReaderIndex index; private Directory directory; private SegmentTermEnum termEnum; private int indexDivisor; private int termIndexInterval; private int readBufferSize = 1024; private IndexReader reader; private List<Term> sampleTerms; @Override public void setUp() throws Exception { super.setUp(); indexDivisor = _TestUtil.nextInt(random, 1, 10); directory = newDirectory(); termIndexInterval = populate(directory); SegmentReader r = SegmentReader.getOnlySegmentReader(directory); String segment = r.getSegmentName(); r.close(); FieldInfos fieldInfos = new FieldInfos(directory, IndexFileNames.segmentFileName(segment, IndexFileNames.FIELD_INFOS_EXTENSION)); String segmentFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION); long tiiFileLength = directory.fileLength(segmentFileName); IndexInput input = directory.openInput(segmentFileName, readBufferSize); termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION), readBufferSize), fieldInfos, false); int totalIndexInterval = termEnum.indexInterval * indexDivisor; SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true); index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval); indexEnum.close(); input.close(); reader = IndexReader.open(directory); sampleTerms = sample(reader,1000); } @Override public void tearDown() throws Exception { termEnum.close(); reader.close(); directory.close(); super.tearDown(); } public void testSeekEnum() throws CorruptIndexException, IOException { int indexPosition = 3; SegmentTermEnum clone = (SegmentTermEnum) termEnum.clone(); Term term = findTermThatWouldBeAtIndex(clone, indexPosition); SegmentTermEnum enumerator = clone; index.seekEnum(enumerator, indexPosition); assertEquals(term, enumerator.term()); clone.close(); } public void testCompareTo() throws IOException { Term term = new Term("field" + random.nextInt(NUMBER_OF_FIELDS) ,getText()); BytesRef termBytesRef = new BytesRef(term.text); for (int i = 0; i < index.length(); i++) { Term t = index.getTerm(i); int compareTo = term.compareTo(t); assertEquals(compareTo, index.compareTo(term, termBytesRef, i)); } } public void testRandomSearchPerformance() throws CorruptIndexException, IOException { IndexSearcher searcher = new IndexSearcher(reader); for (Term t : sampleTerms) { TermQuery query = new TermQuery(t); TopDocs topDocs = searcher.search(query, 10); assertTrue(topDocs.totalHits > 0); } searcher.close(); } private List<Term> sample(IndexReader reader, int size) throws IOException { List<Term> sample = new ArrayList<Term>(); Random random = new Random(); TermEnum terms = reader.terms(); while (terms.next()) { if (sample.size() >= size) { int pos = random.nextInt(size); sample.set(pos, terms.term()); } else { sample.add(terms.term()); } } terms.close(); Collections.shuffle(sample); return sample; } private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException { int termPosition = index * termIndexInterval * indexDivisor; for (int i = 0; i < termPosition; i++) { if (!termEnum.next()) { fail("Should not have run out of terms."); } } return termEnum.term(); } private int populate(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)); // turn off compound file, this test will open some index files directly. LogMergePolicy mp = newLogMergePolicy(); mp.setUseCompoundFile(false); config.setMergePolicy(mp); RandomIndexWriter writer = new RandomIndexWriter(random, directory, config); for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { Document document = new Document(); for (int f = 0; f < NUMBER_OF_FIELDS; f++) { document.add(newField("field" + f,getText(),Store.NO,Index.NOT_ANALYZED_NO_NORMS)); } writer.addDocument(document); } writer.forceMerge(1); writer.close(); return config.getTermIndexInterval(); } private String getText() { return Long.toString(random.nextLong(),Character.MAX_RADIX); } }