/* * Copyright (c) 2010 Lockheed Martin Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.eurekastreams.commons.search.analysis; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.jmock.integration.junit4.JUnit4Mockery; import org.jmock.lib.legacy.ClassImposteriser; import org.junit.Test; import edu.emory.mathcs.backport.java.util.Collections; /** * Test fixture for HashTagTokenizer. */ public class HashTagTokenizerTest { /** * Context for mocking. */ private final JUnit4Mockery context = new JUnit4Mockery() { { setImposteriser(ClassImposteriser.INSTANCE); } }; /** * Token stream. */ private TokenStream tokenStream; /** * Reusable token. */ private final Token reusableToken = context.mock(Token.class, "reusableToken"); /** * Test next() with a token that has a prefix and the prefix in the middle of the word. * * @throws IOException * on error */ @Test public void testNextWithPrefixAndMidReplacement() throws IOException { List<String> expectedList = new ArrayList<String>(); expectedList.add("#bar123foo"); expectedList.add("#bar123foo#foo"); List<String> expectedNonHashtags = new ArrayList<String>(); expectedNonHashtags.add("bar123foo"); expectedNonHashtags.add("foo"); runTest("#bar123foo#foo", "", expectedList, expectedNonHashtags, true, false); } /** * Test next() with content that doesn't have any prefix. * * @throws IOException * on error */ @Test public void testNextWithNoReplacementCharacter() throws IOException { runTest("FObar123fooOOfoo", "FObar123fooOOfoo", new ArrayList<String>(), new ArrayList<String>(), false, false); } /** * Test next() with content that doesn't have any prefix, but does have a replacement. * * @throws IOException * on error */ @Test public void testNextWithReplacementButNoPrefix() throws IOException { List<String> expectedList = new ArrayList<String>(); expectedList.add("bar123foo#foo"); List<String> expectedNonHashtags = new ArrayList<String>(); expectedNonHashtags.add("bar123foo"); expectedNonHashtags.add("foo"); runTest("bar123foo#foo", "", expectedList, expectedNonHashtags, true, false); } /** * Test next() with "foo_bar" - nonliteral. * * @throws IOException * on error */ @Test public void testNextWithUnderscoreNoHashNonLiteral() throws IOException { List<String> expectedList = new ArrayList<String>(); expectedList.add("foo_bars"); List<String> expectedNonHashtags = new ArrayList<String>(); expectedNonHashtags.add("foo_bars"); expectedNonHashtags.add("foo"); expectedNonHashtags.add("bars"); runTest("foo_bars", "", expectedList, expectedNonHashtags, true, false); } /** * Test next() with "foo_bar" - literal. * * @throws IOException * on error */ @Test public void testNextWithUnderscoreNoHashLiteral() throws IOException { List<String> expectedList = new ArrayList<String>(); List<String> expectedNonHashtags = new ArrayList<String>(); runTest("foo_bars", "foo_bars", expectedList, expectedNonHashtags, false, true); } /** * Test next() with content that has a prefix but no replacement. * * @throws IOException * on error */ @Test public void testNextWithPrefixButNoReplacement() throws IOException { List<String> expectedNonHashtags = new ArrayList<String>(); expectedNonHashtags.add("bar123fooFOfoo"); runTest("#bar123fooFOfoo", "", Collections.singletonList("#bar123fooFOfoo"), expectedNonHashtags, true, false); } /** * Test next() with content that has a prefix but no replacement - literal mode. * * @throws IOException * on error */ @Test public void testNextWithPrefixButNoReplacementInLiteralMode() throws IOException { runTest("#bar123foo#FOfoo", "", Collections.singletonList("#bar123foo#FOfoo"), new ArrayList<String>(), true, true); } /** * Test next() with content that has a prefix but no replacement. * * @throws IOException * on error */ @Test public void testNextWithUnderscoreReplacment() throws IOException { List<String> expectedExtractedKeywords = new ArrayList<String>(); expectedExtractedKeywords.add("#bar123_fooFOfoos"); expectedExtractedKeywords.add("bar123_fooFOfoos"); List<String> expectedNonHashtags = new ArrayList<String>(); expectedNonHashtags.add("bar123_fooFOfoos"); expectedNonHashtags.add("bar123"); expectedNonHashtags.add("fooFOfoos"); runTest("#bar123_fooFOfoos", "", expectedExtractedKeywords, expectedNonHashtags, true, false); } /** * Perform a test with no token left. * * @throws IOException * on error */ @Test public void testNextWithNoToken() throws IOException { List<Token> tokens = new ArrayList<Token>(); tokenStream = new TokenStreamTestHelper(tokens); List<String> extractedHashtags = new ArrayList<String>(); List<String> extractedNonHashtags = new ArrayList<String>(); HashTagTokenizer sut = new HashTagTokenizer(tokenStream, extractedHashtags, extractedNonHashtags, false); assertNull(null, sut.next(reusableToken)); } /** * Perform a test with an empty token, followed by a valid token. * * @throws IOException * on error */ @Test public void testNextWithEmptyThenValidToken() throws IOException { final Token token1 = new Token("", 0, 0); final Token token2 = new Token(HashTagTokenizer.HASHTAG_TEMPORARY_REPLACEMENT + "snutsBoo", 0, (HashTagTokenizer.HASHTAG_TEMPORARY_REPLACEMENT + "snutsBoo").length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(token1); tokens.add(token2); tokenStream = new TokenStreamTestHelper(tokens); List<String> extractedHashtags = new ArrayList<String>(); List<String> extractedNonHashtags = new ArrayList<String>(); HashTagTokenizer sut = new HashTagTokenizer(tokenStream, extractedHashtags, extractedNonHashtags, false); assertNull(sut.next(reusableToken)); assertEquals(1, extractedNonHashtags.size()); assertEquals("snutsBoo", extractedNonHashtags.get(0)); assertEquals("", token2.term()); assertEquals(1, extractedHashtags.size()); assertEquals("#snutsBoo", extractedHashtags.get(0)); } /** * Perform a test with the input parameters. * * @param input * the token value * @param expectedReturn * the expected token text * @param expectedExtractedKeywords * the expected hashtags extracted * @param expectedNonHashTags * the expected non-hashtags extracted * @param inExpectNullReturnToken * whether to expect the return token to be null * @param runInLiteralMode * whether to run in literal mode * @throws IOException * on error */ private void runTest(final String input, final String expectedReturn, final List<String> expectedExtractedKeywords, final List<String> expectedNonHashTags, final boolean inExpectNullReturnToken, final boolean runInLiteralMode) throws IOException { String text = input.replace("#", HashTagTokenizer.HASHTAG_TEMPORARY_REPLACEMENT); text = input.replace("_", HashTagTokenizer.UNDERSCORE_TEMPORARY_REPLACEMENT); final Token returnToken = new Token(text, 0, text.length()); List<Token> tokens = new ArrayList<Token>(); tokens.add(returnToken); tokenStream = new TokenStreamTestHelper(tokens); List<String> extractedHashtags = new ArrayList<String>(); List<String> extractedNonHashtags = new ArrayList<String>(); HashTagTokenizer sut = new HashTagTokenizer(tokenStream, extractedHashtags, extractedNonHashtags, runInLiteralMode); if (inExpectNullReturnToken) { assertNull(sut.next(reusableToken)); } else { assertSame(returnToken, sut.next(reusableToken)); } assertEquals(expectedReturn, returnToken.term()); assertEquals(0, returnToken.startOffset()); assertEquals(expectedReturn.length(), returnToken.endOffset()); if (expectedExtractedKeywords == null) { assertEquals(0, extractedHashtags.size()); } else { assertEquals(expectedExtractedKeywords.size(), extractedHashtags.size()); for (int i = 0; i < expectedExtractedKeywords.size(); i++) { assertEquals(expectedExtractedKeywords.get(i), extractedHashtags.get(i)); } } if (expectedNonHashTags == null) { assertEquals(0, extractedNonHashtags.size()); } else { assertEquals(expectedNonHashTags.size(), extractedNonHashtags.size()); for (int i = 0; i < expectedNonHashTags.size(); i++) { assertEquals(expectedNonHashTags.get(i), extractedNonHashtags.get(i)); } } } }