/* * Copyright (c) 2010 Lockheed Martin Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.eurekastreams.commons.search.analysis; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.jmock.Sequence; import org.jmock.integration.junit4.JUnit4Mockery; import org.jmock.lib.legacy.ClassImposteriser; import org.junit.Test; /** * Test fixture for PrefixedTokenRemoverAndExtractorTokenizer. */ public class PrefixedTokenRemoverAndExtractorTokenizerTest { /** * Context for mocking. */ private final JUnit4Mockery context = new JUnit4Mockery() { { setImposteriser(ClassImposteriser.INSTANCE); } }; /** * Token stream. */ private TokenStream tokenStream; /** * Reusable token. */ private final Token reusableToken = context.mock(Token.class, "reusableToken"); /** * Test next() with a token that has a prefix and the prefix in the middle of the word. * * @throws IOException * on error */ @Test public void testNextWithPrefixAndMidReplacement() throws IOException { List<String> extractedKeywords = new ArrayList<String>(); final Token token1 = new Token("FOObar123fooFOOfoo", 0, "FOObar123fooFOOfoo".length()); final Token token2 = new Token("hi", 0, "hi".length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(token1); tokens.add(token2); tokenStream = new TokenStreamTestHelper(tokens); PrefixedTokenRemoverAndExtractorTokenizer sut = new PrefixedTokenRemoverAndExtractorTokenizer(tokenStream, "FOO", "#", extractedKeywords); assertSame(token2, sut.next(reusableToken)); assertEquals("hi", token2.term()); assertEquals(0, token2.startOffset()); assertEquals("hi".length(), token2.endOffset()); assertEquals(1, extractedKeywords.size()); assertEquals("#bar123foo#foo", extractedKeywords.get(0)); } /** * Test next() with content that has a prefix but no replacement. * * @throws IOException * on error */ @Test public void testNextWithPrefixButNoReplacement() throws IOException { List<String> extractedKeywords = new ArrayList<String>(); final Token token1 = new Token("FOObar123fooFOfoo", 0, "FOObar123fooFOfoo".length()); final Token token2 = new Token("hi", 0, "hi".length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(token1); tokens.add(token2); tokenStream = new TokenStreamTestHelper(tokens); PrefixedTokenRemoverAndExtractorTokenizer sut = new PrefixedTokenRemoverAndExtractorTokenizer(tokenStream, "FOO", "#", extractedKeywords); assertSame(token2, sut.next(reusableToken)); assertEquals("hi", token2.term()); assertEquals(0, token2.startOffset()); assertEquals("hi".length(), token2.endOffset()); assertEquals(1, extractedKeywords.size()); assertEquals("#bar123fooFOfoo", extractedKeywords.get(0)); } /** * Test next() with content that doesn't have any prefix. * * @throws IOException * on error */ @Test public void testNextWithNoReplacementCharacter() throws IOException { runTest("FOO", "#", "FObar123fooOOfoo", "FObar123fooOOfoo"); } /** * Test next() with content that doesn't have any prefix, but does have a replacement. * * @throws IOException * on error */ @Test public void testNextWithReplacementButNoPrefix() throws IOException { runTest("FOO", "#", "bar123fooFOOfoo", "bar123foo#foo"); } /** * Perform a test with no token left. * * @throws IOException * on error */ @Test public void testNextWithNoToken() throws IOException { List<String> extractedKeywords = new ArrayList<String>(); List<Token> tokens = new ArrayList<Token>(); tokenStream = new TokenStreamTestHelper(tokens); PrefixedTokenRemoverAndExtractorTokenizer sut = new PrefixedTokenRemoverAndExtractorTokenizer(tokenStream, "FOO", "#", extractedKeywords); assertNull(null, sut.next(reusableToken)); } /** * Perform a test with an empty token, followed by a valid token. * * @throws IOException * on error */ @Test public void testNextWithEmptyThenValidToken() throws IOException { List<String> extractedKeywords = new ArrayList<String>(); final Token token1 = new Token("", 0, 0); final Token token2 = new Token("ABCD", 0, "ABCD".length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(token1); tokens.add(token2); tokenStream = new TokenStreamTestHelper(tokens); PrefixedTokenRemoverAndExtractorTokenizer sut = new PrefixedTokenRemoverAndExtractorTokenizer(tokenStream, "FOO", "#", extractedKeywords); assertSame(token2, sut.next(reusableToken)); assertEquals("ABCD", token2.term()); assertEquals(0, token2.startOffset()); assertEquals("ABCD".length(), token2.endOffset()); } /** * Perform a test with the input parameters. * * @param replaceFrom * the text to replace from * @param replaceTo * the text to replace to * @param input * the token value * @param expectedReturn * the expected token text * @throws IOException * on error */ private void runTest(final String replaceFrom, final String replaceTo, final String input, final String expectedReturn) throws IOException { final Sequence sequence = context.sequence("sequence-name"); List<String> extractedKeywords = new ArrayList<String>(); final Token returnToken = new Token(input, 0, input.length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(returnToken); tokenStream = new TokenStreamTestHelper(tokens); PrefixedTokenRemoverAndExtractorTokenizer sut = new PrefixedTokenRemoverAndExtractorTokenizer(tokenStream, replaceFrom, replaceTo, extractedKeywords); assertSame(returnToken, sut.next(reusableToken)); assertEquals(expectedReturn, returnToken.term()); assertEquals(0, returnToken.startOffset()); assertEquals(expectedReturn.length(), returnToken.endOffset()); } }