/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.suggest.document; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.synonym.SynonymFilter; import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; import org.junit.Test; public class CompletionTokenStreamTest extends BaseTokenStreamTestCase { @Test public void testBasic() throws Exception { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); String input = "mykeyword"; BytesRef payload = new BytesRef("payload"); tokenStream.setReader(new StringReader(input)); CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream); completionTokenStream.setPayload(payload); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); assertTokenStreamContents(stream, new String[] {input}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null); } @Test public void testWithNoPreserveSep() throws Exception { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); String input = "mykeyword another keyword"; BytesRef payload = new BytesRef("payload"); tokenStream.setReader(new StringReader(input)); CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream, false, false, 100); completionTokenStream.setPayload(payload); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); assertTokenStreamContents(stream, new String[] {"mykeywordanotherkeyword"}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null); } @Test public void testWithMultipleTokens() throws Exception { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); String input = "mykeyword another keyword"; tokenStream.setReader(new StringReader(input)); BytesRef payload = new BytesRef("payload"); CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream); completionTokenStream.setPayload(payload); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); CharsRefBuilder builder = new CharsRefBuilder(); builder.append("mykeyword"); builder.append(((char) CompletionAnalyzer.SEP_LABEL)); builder.append("another"); builder.append(((char) CompletionAnalyzer.SEP_LABEL)); builder.append("keyword"); assertTokenStreamContents(stream, new String[]{builder.toCharsRef().toString()}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null); } @Test public void testWithSynonym() throws Exception { SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true); Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader("mykeyword")); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter); BytesRef payload = new BytesRef("payload"); completionTokenStream.setPayload(payload); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new String[] {payload.utf8ToString(), payload.utf8ToString()}, new int[] { 1, 1 }, null, null); } @Test public void testWithSynonyms() throws Exception { SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true); Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); String input = "mykeyword another keyword"; tokenStream.setReader(new StringReader(input)); SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true); BytesRef payload = new BytesRef("payload"); CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter, true, false, 100); completionTokenStream.setPayload(payload); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); String[] expectedOutputs = new String[2]; CharsRefBuilder expectedOutput = new CharsRefBuilder(); expectedOutput.append("mykeyword"); expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL)); expectedOutput.append("another"); expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL)); expectedOutput.append("keyword"); expectedOutputs[0] = expectedOutput.toCharsRef().toString(); expectedOutput.clear(); expectedOutput.append("mysynonym"); expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL)); expectedOutput.append("another"); expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL)); expectedOutput.append("keyword"); expectedOutputs[1] = expectedOutput.toCharsRef().toString(); assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 1}, null, null); } @Test public void testValidNumberOfExpansions() throws IOException { SynonymMap.Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true); } StringBuilder valueBuilder = new StringBuilder(); for (int i = 0 ; i < 8 ; i++) { valueBuilder.append(i+1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader(valueBuilder.toString())); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter); completionTokenStream.setPayload(new BytesRef()); PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream); stream.reset(); CompletionTokenStream.BytesRefBuilderTermAttribute attr = stream.addAttribute(CompletionTokenStream.BytesRefBuilderTermAttribute.class); PositionIncrementAttribute posAttr = stream.addAttribute(PositionIncrementAttribute.class); int maxPos = 0; int count = 0; while(stream.incrementToken()) { count++; assertNotNull(attr.getBytesRef()); assertTrue(attr.getBytesRef().length > 0); maxPos += posAttr.getPositionIncrement(); } stream.close(); assertEquals(count, 256); assertEquals(count, maxPos); } public final static class PayloadAttrToTypeAttrFilter extends TokenFilter { private PayloadAttribute payload = addAttribute(PayloadAttribute.class); private TypeAttribute type = addAttribute(TypeAttribute.class); protected PayloadAttrToTypeAttrFilter(TokenStream input) { super(input); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { // we move them over so we can assert them more easily in the tests type.setType(payload.getPayload().utf8ToString()); return true; } return false; } } }