/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; class RepeatingTokenizer extends Tokenizer { private final Random random; private final float percentDocs; private final int maxTF; private int num; CharTermAttribute termAtt; String value; public RepeatingTokenizer(String val, Random random, float percentDocs, int maxTF) { super(); this.value = val; this.random = random; this.percentDocs = percentDocs; this.maxTF = maxTF; this.termAtt = addAttribute(CharTermAttribute.class); } @Override public boolean incrementToken() throws IOException { num--; if (num >= 0) { clearAttributes(); termAtt.append(value); return true; } return false; } @Override public void reset() throws IOException { super.reset(); if (random.nextFloat() < percentDocs) { num = random.nextInt(maxTF) + 1; } else { num = 0; } } } public class TestTermdocPerf extends LuceneTestCase { void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException { Analyzer analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new RepeatingTokenizer(val, random, percentDocs, maxTF)); } }; Document doc = new Document(); doc.add(newStringField(field, val, Field.Store.NO)); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(analyzer) .setOpenMode(OpenMode.CREATE) .setMaxBufferedDocs(100) .setMergePolicy(newLogMergePolicy(100)) ); for (int i=0; i<ndocs; i++) { writer.addDocument(doc); } writer.forceMerge(1); writer.close(); } public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException { Directory dir = newDirectory(); long start = System.currentTimeMillis(); addDocs(random(), dir, ndocs, "foo", "val", maxTF, percentDocs); long end = System.currentTimeMillis(); if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start)); IndexReader reader = DirectoryReader.open(dir); TermsEnum tenum = MultiFields.getTerms(reader, "foo").iterator(); start = System.currentTimeMillis(); int ret=0; PostingsEnum tdocs = null; final Random random = new Random(random().nextLong()); for (int i=0; i<iter; i++) { tenum.seekCeil(new BytesRef("val")); tdocs = TestUtil.docs(random, tenum, tdocs, PostingsEnum.NONE); while (tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret += tdocs.docID(); } } end = System.currentTimeMillis(); if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start)); return ret; } public void testTermDocPerf() throws IOException { // performance test for 10% of documents containing a term // doTest(100000, 10000,3,.1f); } }