package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.TermsEnum.SeekStatus; import; import; import; import; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.automaton.*; public class TestTermsEnum2 extends LuceneTestCase { private Directory dir; private IndexReader reader; private IndexSearcher searcher; private SortedSet<BytesRef> terms; // the terms we put in the index private Automaton termsAutomaton; // automata of the same int numIterations; @Override public void setUp() throws Exception { super.setUp(); numIterations = atLeast(50); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)) .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); Field field = newStringField("field", "", Field.Store.YES); doc.add(field); terms = new TreeSet<>(); int num = atLeast(200); for (int i = 0; i < num; i++) { String s = TestUtil.randomUnicodeString(random()); field.setStringValue(s); terms.add(new BytesRef(s)); writer.addDocument(doc); } termsAutomaton = BasicAutomata.makeStringUnion(terms); reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); } @Override public void tearDown() throws Exception { reader.close(); dir.close(); super.tearDown(); } /** tests a pre-intersected automaton against the original */ public void testFiniteVersusInfinite() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); final List<BytesRef> matchedTerms = new ArrayList<>(); for(BytesRef t : terms) { if (, t.utf8ToString())) { matchedTerms.add(t); } } Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms); //System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length); //AutomatonTestUtil.minimizeSimple(alternate); //System.out.println("minmize done"); AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton); AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate); CheckHits.checkEqual(a1,, 25).scoreDocs,, 25).scoreDocs); } } /** seeks to every term accepted by some automata */ public void testSeeking() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null); ArrayList<BytesRef> unsortedTerms = new ArrayList<>(terms); Collections.shuffle(unsortedTerms, random()); for (BytesRef term : unsortedTerms) { if (, term.utf8ToString())) { // term is accepted if (random().nextBoolean()) { // seek exact assertTrue(te.seekExact(term)); } else { // seek ceil assertEquals(SeekStatus.FOUND, te.seekCeil(term)); assertEquals(term, te.term()); } } } } } /** mixes up seek and next for all terms */ public void testSeekingAndNexting() throws Exception { for (int i = 0; i < numIterations; i++) { TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null); for (BytesRef term : terms) { int c = random().nextInt(3); if (c == 0) { assertEquals(term,; } else if (c == 1) { assertEquals(SeekStatus.FOUND, te.seekCeil(term)); assertEquals(term, te.term()); } else { assertTrue(te.seekExact(term)); } } } } /** tests intersect: TODO start at a random term! */ public void testIntersect() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false); TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null); Automaton expected = BasicOperations.intersection(termsAutomaton, automaton); TreeSet<BytesRef> found = new TreeSet<>(); while ( != null) { found.add(BytesRef.deepCopyOf(te.term())); } Automaton actual = BasicAutomata.makeStringUnion(found); assertTrue(BasicOperations.sameLanguage(expected, actual)); } } }