TestMultiFields.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;

public class TestMultiFields extends LuceneTestCase {

  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                             .setMergePolicy(NoMergePolicy.INSTANCE));
      // we can do this because we use NoMergePolicy (and dont merge to "nothing")
      w.setKeepFullyDeletedSegments(true);

      Map<BytesRef,List<Integer>> docs = new HashMap<>();
      Set<Integer> deleted = new HashSet<>();
      List<BytesRef> terms = new ArrayList<>();

      int numDocs = TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<>();
      for(int i=0;i<numDocs;i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue(""+i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", ""+delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<>(uniqueTerms);
        Collections.sort(termsList);
        System.out.println("TEST: terms in UTF-8 order:");
        for(BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for(int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for(int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for(int i=0;i<100;i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }
        
        PostingsEnum postingsEnum = TestUtil.docs(random(), reader, "field", term, null, PostingsEnum.NONE);
        assertNotNull(postingsEnum);

        for(int docID : docs.get(term)) {
          assertEquals(docID, postingsEnum.nextDoc());
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }

  /*
  private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
    DocsEnum docs = _TestUtil.docs(random, r,
                                   "field",
                                   new BytesRef(term),
                                   MultiFields.getLiveDocs(r),
                                   null,
                                   false);
    for(int docID : expected) {
      assertEquals(docID, docs.nextDoc());
    }
    assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
  }
  */

  public void testSeparateEnums() throws Exception {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document d = new Document();
    d.add(newStringField("f", "j", Field.Store.NO));
    w.addDocument(d);
    w.commit();
    w.addDocument(d);
    IndexReader r = w.getReader();
    w.close();
    PostingsEnum d1 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
    PostingsEnum d2 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
    assertEquals(0, d1.nextDoc());
    assertEquals(0, d2.nextDoc());
    r.close();
    dir.close();
  }
  
  public void testTermDocsEnum() throws Exception {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document d = new Document();
    d.add(newStringField("f", "j", Field.Store.NO));
    w.addDocument(d);
    w.commit();
    w.addDocument(d);
    IndexReader r = w.getReader();
    w.close();
    PostingsEnum de = MultiFields.getTermDocsEnum(r, "f", new BytesRef("j"));
    assertEquals(0, de.nextDoc());
    assertEquals(1, de.nextDoc());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
    r.close();
    dir.close();
  }
}