/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.suggest.fst; import java.util.*; import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; public class WFSTCompletionTest extends LuceneTestCase { public void testBasic() throws Exception { Input keys[] = new Input[] { new Input("foo", 50), new Input("bar", 10), new Input("barbar", 12), new Input("barbara", 6) }; Random random = new Random(random().nextLong()); Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst"); suggester.build(new InputArrayIterator(keys)); // top N of 2, but only foo is available List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random), false, 2); assertEquals(1, results.size()); assertEquals("foo", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // make sure we don't get a dup exact suggestion: results = suggester.lookup(TestUtil.stringToCharSequence("foo", random), false, 2); assertEquals(1, results.size()); assertEquals("foo", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // top N of 1 for 'bar': we return this even though barbar is higher results = suggester.lookup(TestUtil.stringToCharSequence("bar", random), false, 1); assertEquals(1, results.size()); assertEquals("bar", results.get(0).key.toString()); assertEquals(10, results.get(0).value, 0.01F); // top N Of 2 for 'b' results = suggester.lookup(TestUtil.stringToCharSequence("b", random), false, 2); assertEquals(2, results.size()); assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); // top N of 3 for 'ba' results = suggester.lookup(TestUtil.stringToCharSequence("ba", random), false, 3); assertEquals(3, results.size()); assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); assertEquals("barbara", results.get(2).key.toString()); assertEquals(6, results.get(2).value, 0.01F); tempDir.close(); } public void testExactFirst() throws Exception { Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", true); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x", results.get(0).key); assertEquals(2, results.get(0).value); if (topN > 1) { assertEquals("x y", results.get(1).key); assertEquals(20, results.get(1).value); } } tempDir.close(); } public void testNonExactFirst() throws Exception { Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x y", results.get(0).key); assertEquals(20, results.get(0).value); if (topN > 1) { assertEquals("x", results.get(1).key); assertEquals(2, results.get(1).value); } } tempDir.close(); } public void testRandom() throws Exception { int numWords = atLeast(1000); final TreeMap<String,Long> slowCompletor = new TreeMap<>(); final TreeSet<String> allPrefixes = new TreeSet<>(); Input[] keys = new Input[numWords]; for (int i = 0; i < numWords; i++) { String s; while (true) { // TODO: would be nice to fix this slowCompletor/comparator to // use full range, but we might lose some coverage too... s = TestUtil.randomSimpleString(random()); if (!slowCompletor.containsKey(s)) { break; } } for (int j = 1; j < s.length(); j++) { allPrefixes.add(s.substring(0, j)); } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); slowCompletor.put(s, (long)weight); keys[i] = new Input(s, weight); } Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false); suggester.build(new InputArrayIterator(keys)); assertEquals(numWords, suggester.getCount()); Random random = new Random(random().nextLong()); for (String prefix : allPrefixes) { final int topN = TestUtil.nextInt(random, 1, 10); List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random), false, topN); // 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion final List<LookupResult> matches = new ArrayList<>(); // TODO: could be faster... but it's slowCompletor for a reason for (Map.Entry<String,Long> e : slowCompletor.entrySet()) { if (e.getKey().startsWith(prefix)) { matches.add(new LookupResult(e.getKey(), e.getValue().longValue())); } } assertTrue(matches.size() > 0); Collections.sort(matches, new Comparator<LookupResult>() { @Override public int compare(LookupResult left, LookupResult right) { int cmp = Float.compare(right.value, left.value); if (cmp == 0) { return left.compareTo(right); } else { return cmp; } } }); if (matches.size() > topN) { matches.subList(topN, matches.size()).clear(); } assertEquals(matches.size(), r.size()); for(int hit=0;hit<r.size();hit++) { //System.out.println(" check hit " + hit); assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString()); assertEquals(matches.get(hit).value, r.get(hit).value, 0f); } } tempDir.close(); } public void test0ByteKeys() throws Exception { BytesRef key1 = new BytesRef(4); key1.length = 4; BytesRef key2 = new BytesRef(3); key1.length = 3; Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false); suggester.build(new InputArrayIterator(new Input[] { new Input(key1, 50), new Input(key2, 50), })); tempDir.close(); } public void testEmpty() throws Exception { Directory tempDir = getDirectory(); WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false); suggester.build(new InputArrayIterator(new Input[0])); assertEquals(0, suggester.getCount()); List<LookupResult> result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); tempDir.close(); } private Directory getDirectory() { return newDirectory(); } }