TestLucene40PostingsReader.java example

Explorer
heliosearch-master
- lucene
- solr
package org.apache.lucene.codecs.lucene40;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.ArrayList;
import java.util.Collections;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;

public class TestLucene40PostingsReader extends LuceneTestCase {
  static final String terms[] = new String[100];
  static {
    for (int i = 0; i < terms.length; i++) {
      terms[i] = Integer.toString(i+1);
    }
  }
  
  @BeforeClass
  public static void beforeClass() {
    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
  }

  /** tests terms with different probabilities of being in the document.
   *  depends heavily on term vectors cross-check at checkIndex
   */
  public void testPostings() throws Exception {
    Directory dir = newFSDirectory(TestUtil.getTempDir("postings"));
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(Codec.forName("Lucene40"));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    
    Document doc = new Document();
    
    // id field
    FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);
    idType.setStoreTermVectors(true);
    Field idField = new Field("id", "", idType);
    doc.add(idField);
    
    // title field: short text field
    FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);
    titleType.setStoreTermVectors(true);
    titleType.setStoreTermVectorPositions(true);
    titleType.setStoreTermVectorOffsets(true);
    titleType.setIndexOptions(indexOptions());
    Field titleField = new Field("title", "", titleType);
    doc.add(titleField);
    
    // body field: long text field
    FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);
    bodyType.setStoreTermVectors(true);
    bodyType.setStoreTermVectorPositions(true);
    bodyType.setStoreTermVectorOffsets(true);
    bodyType.setIndexOptions(indexOptions());
    Field bodyField = new Field("body", "", bodyType);
    doc.add(bodyField);
    
    int numDocs = atLeast(1000);
    for (int i = 0; i < numDocs; i++) {
      idField.setStringValue(Integer.toString(i));
      titleField.setStringValue(fieldValue(1));
      bodyField.setStringValue(fieldValue(3));
      iw.addDocument(doc);
      if (random().nextInt(20) == 0) {
        iw.deleteDocuments(new Term("id", Integer.toString(i)));
      }
    }
    if (random().nextBoolean()) {
      // delete 1-100% of docs
      iw.deleteDocuments(new Term("title", terms[random().nextInt(terms.length)]));
    }
    iw.close();
    dir.close(); // checkindex
  }
  
  IndexOptions indexOptions() {
    switch(random().nextInt(4)) {
      case 0: return IndexOptions.DOCS_ONLY;
      case 1: return IndexOptions.DOCS_AND_FREQS;
      case 2: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
      default: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    }
  }
  
  String fieldValue(int maxTF) {
    ArrayList<String> shuffled = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    int i = random().nextInt(terms.length);
    while (i < terms.length) {
      int tf =  TestUtil.nextInt(random(), 1, maxTF);
      for (int j = 0; j < tf; j++) {
        shuffled.add(terms[i]);
      }
      i++;
    }
    Collections.shuffle(shuffled, random());
    for (String term : shuffled) {
      sb.append(term);
      sb.append(' ');
    }
    return sb.toString();
  }
}