/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.fst; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.fst.UpToTwoPositiveIntOutputs.TwoLongs; import static org.apache.lucene.util.fst.FSTTester.getRandomString; import static org.apache.lucene.util.fst.FSTTester.toIntsRef; public class TestFSTsMisc extends LuceneTestCase { private MockDirectoryWrapper dir; @Override public void setUp() throws Exception { super.setUp(); dir = newMockDirectory(); } @Override public void tearDown() throws Exception { // can be null if we force simpletext (funky, some kind of bug in test runner maybe) if (dir != null) dir.close(); super.tearDown(); } public void testRandomWords() throws IOException { testRandomWords(1000, LuceneTestCase.atLeast(random(), 2)); //testRandomWords(100, 1); } private void testRandomWords(int maxNumWords, int numIter) throws IOException { Random random = new Random(random().nextLong()); for(int iter=0;iter<numIter;iter++) { if (VERBOSE) { System.out.println("\nTEST: iter " + iter); } for(int inputMode=0;inputMode<2;inputMode++) { final int numWords = random.nextInt(maxNumWords+1); Set<IntsRef> termsSet = new HashSet<>(); IntsRef[] terms = new IntsRef[numWords]; while(termsSet.size() < numWords) { final String term = getRandomString(random); termsSet.add(toIntsRef(term, inputMode)); } doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()])); } } } private void doTest(int inputMode, IntsRef[] terms) throws IOException { Arrays.sort(terms); // Up to two positive ints, shared, generally but not // monotonically increasing { if (VERBOSE) { System.out.println("TEST: now test UpToTwoPositiveIntOutputs"); } final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true); final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<>(terms.length); long lastOutput = 0; for(int idx=0;idx<terms.length;idx++) { // Sometimes go backwards long value = lastOutput + TestUtil.nextInt(random(), -100, 1000); while(value < 0) { value = lastOutput + TestUtil.nextInt(random(), -100, 1000); } final Object output; if (random().nextInt(5) == 3) { long value2 = lastOutput + TestUtil.nextInt(random(), -100, 1000); while(value2 < 0) { value2 = lastOutput + TestUtil.nextInt(random(), -100, 1000); } List<Long> values = new ArrayList<>(); values.add(value); values.add(value2); output = values; } else { output = outputs.get(value); } pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) { @Override protected boolean outputsEqual(Object output1, Object output2) { if (output1 instanceof TwoLongs && output2 instanceof List) { TwoLongs twoLongs1 = (TwoLongs) output1; return Arrays.asList(new Long[] {twoLongs1.first, twoLongs1.second}).equals(output2); } else if (output2 instanceof TwoLongs && output1 instanceof List) { TwoLongs twoLongs2 = (TwoLongs) output2; return Arrays.asList(new Long[] {twoLongs2.first, twoLongs2.second}).equals(output1); } return output1.equals(output2); } }.doTest(false); } // ListOfOutputs(PositiveIntOutputs), generally but not // monotonically increasing { if (VERBOSE) { System.out.println("TEST: now test OneOrMoreOutputs"); } final PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); final ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs); final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<>(terms.length); long lastOutput = 0; for(int idx=0;idx<terms.length;idx++) { int outputCount = TestUtil.nextInt(random(), 1, 7); List<Long> values = new ArrayList<>(); for(int i=0;i<outputCount;i++) { // Sometimes go backwards long value = lastOutput + TestUtil.nextInt(random(), -100, 1000); while(value < 0) { value = lastOutput + TestUtil.nextInt(random(), -100, 1000); } values.add(value); lastOutput = value; } final Object output; if (values.size() == 1) { output = values.get(0); } else { output = values; } pairs.add(new FSTTester.InputOutput<>(terms[idx], output)); } new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false); } } public void testListOfOutputs() throws Exception { PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs); final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); // Add the same input more than once and the outputs // are merged: builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L); final FST<Object> fst = builder.finish(); Object output = Util.get(fst, new BytesRef("a")); assertNotNull(output); List<Long> outputList = outputs.asList(output); assertEquals(3, outputList.size()); assertEquals(1L, outputList.get(0).longValue()); assertEquals(3L, outputList.get(1).longValue()); assertEquals(0L, outputList.get(2).longValue()); output = Util.get(fst, new BytesRef("b")); assertNotNull(output); outputList = outputs.asList(output); assertEquals(1, outputList.size()); assertEquals(17L, outputList.get(0).longValue()); } public void testListOfOutputsEmptyString() throws Exception { PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs); final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); builder.add(scratch.get(), 0L); builder.add(scratch.get(), 1L); builder.add(scratch.get(), 17L); builder.add(scratch.get(), 1L); builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L); final FST<Object> fst = builder.finish(); Object output = Util.get(fst, new BytesRef("")); assertNotNull(output); List<Long> outputList = outputs.asList(output); assertEquals(4, outputList.size()); assertEquals(0L, outputList.get(0).longValue()); assertEquals(1L, outputList.get(1).longValue()); assertEquals(17L, outputList.get(2).longValue()); assertEquals(1L, outputList.get(3).longValue()); output = Util.get(fst, new BytesRef("a")); assertNotNull(output); outputList = outputs.asList(output); assertEquals(3, outputList.size()); assertEquals(1L, outputList.get(0).longValue()); assertEquals(3L, outputList.get(1).longValue()); assertEquals(0L, outputList.get(2).longValue()); output = Util.get(fst, new BytesRef("b")); assertNotNull(output); outputList = outputs.asList(output); assertEquals(1, outputList.size()); assertEquals(0L, outputList.get(0).longValue()); } }