package org.apache.lucene.search.suggest.fst; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.Random; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.fst.FSTLookup; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.search.suggest.LookupBenchmarkTest; import org.apache.lucene.search.suggest.TermFreq; import org.apache.lucene.search.suggest.TermFreqArrayIterator; /** * Unit tests for {@link FSTLookup}. */ public class FSTLookupTest extends LuceneTestCase { public static TermFreq tf(String t, float v) { return new TermFreq(t, v); } private FSTLookup lookup; public void setUp() throws Exception { super.setUp(); lookup = new FSTLookup(); lookup.build(new TermFreqArrayIterator(evalKeys())); } private TermFreq[] evalKeys() { final TermFreq[] keys = new TermFreq[] { tf("one", 0.5f), tf("oneness", 1), tf("onerous", 1), tf("onesimus", 1), tf("two", 1), tf("twofold", 1), tf("twonk", 1), tf("thrive", 1), tf("through", 1), tf("threat", 1), tf("three", 1), tf("foundation", 1), tf("fourblah", 1), tf("fourteen", 1), tf("four", 0.5f), tf("fourier", 0.5f), tf("fourty", 0.5f), tf("xo", 1), }; return keys; } public void testExactMatchHighPriority() throws Exception { assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0"); } public void testExactMatchLowPriority() throws Exception { assertMatchEquals(lookup.lookup("one", true, 2), "one/0.0", "oneness/1.0"); } public void testRequestedCount() throws Exception { // 'one' is promoted after collecting two higher ranking results. assertMatchEquals(lookup.lookup("one", true, 2), "one/0.0", "oneness/1.0"); // 'one' is at the top after collecting all alphabetical results. assertMatchEquals(lookup.lookup("one", false, 2), "one/0.0", "oneness/1.0"); // 'four' is collected in a bucket and then again as an exact match. assertMatchEquals(lookup.lookup("four", true, 2), "four/0.0", "fourblah/1.0"); // Check reordering of exact matches. assertMatchEquals(lookup.lookup("four", true, 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", "fourier/0.0"); lookup = new FSTLookup(10, false); lookup.build(new TermFreqArrayIterator(evalKeys())); // 'one' is not promoted after collecting two higher ranking results. assertMatchEquals(lookup.lookup("one", true, 2), "oneness/1.0", "onerous/1.0"); // 'one' is at the top after collecting all alphabetical results. assertMatchEquals(lookup.lookup("one", false, 2), "one/0.0", "oneness/1.0"); } public void testMiss() throws Exception { assertMatchEquals(lookup.lookup("xyz", true, 1)); } public void testAlphabeticWithWeights() throws Exception { assertEquals(0, lookup.lookup("xyz", false, 1).size()); } public void testFullMatchList() throws Exception { assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE), "oneness/1.0", "onerous/1.0", "onesimus/1.0", "one/0.0"); } public void testMultilingualInput() throws Exception { List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki(); lookup = new FSTLookup(); lookup.build(new TermFreqArrayIterator(input)); for (TermFreq tf : input) { assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null); assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key); } } public void testEmptyInput() throws Exception { lookup = new FSTLookup(); lookup.build(new TermFreqArrayIterator(new TermFreq[0])); assertMatchEquals(lookup.lookup("", true, 10)); } public void testRandom() throws Exception { List<TermFreq> freqs = new ArrayList<TermFreq>(); Random rnd = random; for (int i = 0; i < 5000; i++) { freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100))); } lookup = new FSTLookup(); lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); for (TermFreq tf : freqs) { final String term = tf.term; for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(prefix, true, 10)) { assertTrue(lr.key.startsWith(prefix)); } } } } private void assertMatchEquals(List<LookupResult> res, String... expected) { String [] result = new String [res.size()]; for (int i = 0; i < res.size(); i++) result[i] = res.get(i).toString(); if (!Arrays.equals(expected, result)) { int colLen = Math.max(maxLen(expected), maxLen(result)); StringBuilder b = new StringBuilder(); String format = "%" + colLen + "s " + "%" + colLen + "s\n"; b.append(String.format(Locale.ENGLISH, format, "Expected", "Result")); for (int i = 0; i < Math.max(result.length, expected.length); i++) { b.append(String.format(Locale.ENGLISH, format, i < expected.length ? expected[i] : "--", i < result.length ? result[i] : "--")); } System.err.println(b.toString()); fail("Expected different output:\n" + b.toString()); } } private int maxLen(String[] result) { int len = 0; for (String s : result) len = Math.max(len, s.length()); return len; } }