/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.automaton; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; public class TestCompiledAutomaton extends LuceneTestCase { private CompiledAutomaton build(int maxDeterminizedStates, String... strings) { final List<BytesRef> terms = new ArrayList<>(); for(String s : strings) { terms.add(new BytesRef(s)); } Collections.sort(terms); final Automaton a = DaciukMihovAutomatonBuilder.build(terms); return new CompiledAutomaton(a, true, false, maxDeterminizedStates, false); } private void testFloor(CompiledAutomaton c, String input, String expected) { final BytesRef b = new BytesRef(input); final BytesRef result = c.floor(b, new BytesRefBuilder()); if (expected == null) { assertNull(result); } else { assertNotNull(result); assertEquals("actual=" + result.utf8ToString() + " vs expected=" + expected + " (input=" + input + ")", result, new BytesRef(expected)); } } private void testTerms(int maxDeterminizedStates, String[] terms) throws Exception { final CompiledAutomaton c = build(maxDeterminizedStates, terms); final BytesRef[] termBytes = new BytesRef[terms.length]; for(int idx=0;idx<terms.length;idx++) { termBytes[idx] = new BytesRef(terms[idx]); } Arrays.sort(termBytes); if (VERBOSE) { System.out.println("\nTEST: terms in unicode order"); for(BytesRef t : termBytes) { System.out.println(" " + t.utf8ToString()); } //System.out.println(c.utf8.toDot()); } for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) { final String s = random().nextInt(10) == 1 ? terms[random().nextInt(terms.length)] : randomString(); if (VERBOSE) { System.out.println("\nTEST: floor(" + s + ")"); } int loc = Arrays.binarySearch(termBytes, new BytesRef(s)); final String expected; if (loc >= 0) { expected = s; } else { // term doesn't exist loc = -(loc+1); if (loc == 0) { expected = null; } else { expected = termBytes[loc-1].utf8ToString(); } } if (VERBOSE) { System.out.println(" expected=" + expected); } testFloor(c, s, expected); } } public void testRandom() throws Exception { final int numTerms = atLeast(400); final Set<String> terms = new HashSet<>(); while(terms.size() != numTerms) { terms.add(randomString()); } testTerms(numTerms * 100, terms.toArray(new String[terms.size()])); } private String randomString() { // return _TestUtil.randomSimpleString(random); return TestUtil.randomRealisticUnicodeString(random()); } public void testBasic() throws Exception { CompiledAutomaton c = build(Operations.DEFAULT_MAX_DETERMINIZED_STATES, "fob", "foo", "goo"); testFloor(c, "goo", "goo"); testFloor(c, "ga", "foo"); testFloor(c, "g", "foo"); testFloor(c, "foc", "fob"); testFloor(c, "foz", "foo"); testFloor(c, "f", null); testFloor(c, "", null); testFloor(c, "aa", null); testFloor(c, "zzz", "goo"); } // LUCENE-6367 public void testBinaryAll() throws Exception { Automaton a = new Automaton(); int state = a.createState(); a.setAccept(state, true); a.addTransition(state, state, 0, 0xff); a.finishState(); CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type); } // LUCENE-6367 public void testUnicodeAll() throws Exception { Automaton a = new Automaton(); int state = a.createState(); a.setAccept(state, true); a.addTransition(state, state, 0, Character.MAX_CODE_POINT); a.finishState(); CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type); } // LUCENE-6367 public void testBinarySingleton() throws Exception { // This is just ascii so we can pretend it's binary: Automaton a = Automata.makeString("foobar"); CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type); } // LUCENE-6367 public void testUnicodeSingleton() throws Exception { Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random())); CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false); assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type); } }