TestTermdocPerf.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Random;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;

class RepeatingTokenizer extends Tokenizer {
  
  private final Random random;
  private final float percentDocs;
  private final int maxTF;
  private int num;
  CharTermAttribute termAtt;
  String value;

   public RepeatingTokenizer(String val, Random random, float percentDocs, int maxTF) {
     super();
     this.value = val;
     this.random = random;
     this.percentDocs = percentDocs;
     this.maxTF = maxTF;
     this.termAtt = addAttribute(CharTermAttribute.class);
   }

   @Override
   public boolean incrementToken() throws IOException {
     num--;
     if (num >= 0) {
       clearAttributes();
       termAtt.append(value);
       return true;
     }
     return false;
   }

  @Override
  public void reset() throws IOException {
    super.reset();
    if (random.nextFloat() < percentDocs) {
      num = random.nextInt(maxTF) + 1;
    } else {
      num = 0;
    }
  }
}


public class TestTermdocPerf extends LuceneTestCase {

  void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {

    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName) {
        return new TokenStreamComponents(new RepeatingTokenizer(val, random, percentDocs, maxTF));
      }
    };

    Document doc = new Document();
    
    doc.add(newStringField(field, val, Field.Store.NO));
    IndexWriter writer = new IndexWriter(
        dir,
        newIndexWriterConfig(analyzer)
          .setOpenMode(OpenMode.CREATE)
          .setMaxBufferedDocs(100)
          .setMergePolicy(newLogMergePolicy(100))
    );

    for (int i=0; i<ndocs; i++) {
      writer.addDocument(doc);
    }

    writer.forceMerge(1);
    writer.close();
  }


  public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
    Directory dir = newDirectory();

    long start = System.currentTimeMillis();
    addDocs(random(), dir, ndocs, "foo", "val", maxTF, percentDocs);
    long end = System.currentTimeMillis();
    if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));

    IndexReader reader = DirectoryReader.open(dir);

    TermsEnum tenum = MultiFields.getTerms(reader, "foo").iterator();

    start = System.currentTimeMillis();

    int ret=0;
    PostingsEnum tdocs = null;
    final Random random = new Random(random().nextLong());
    for (int i=0; i<iter; i++) {
      tenum.seekCeil(new BytesRef("val"));
      tdocs = TestUtil.docs(random, tenum, tdocs, PostingsEnum.NONE);
      while (tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        ret += tdocs.docID();
      }
    }

    end = System.currentTimeMillis();
    if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));

    return ret;
  }

  public void testTermDocPerf() throws IOException {
    // performance test for 10% of documents containing a term
    // doTest(100000, 10000,3,.1f);
  }


}