package com.twitter.common.text.combiner; import com.google.common.collect.ImmutableList; import com.twitter.common.text.filter.PunctuationFilter; import com.twitter.common.text.token.TokenStream; import com.twitter.common.text.tokenizer.LatinTokenizer; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; public class PunctuationExceptionCombinerTest { private TokenStream tokenized; @Before public void setup() { tokenized = new LatinTokenizer.Builder().setKeepPunctuation(true).build(); } @Test public void testPunctuationExceptions() { TokenStream stream = new PunctuationExceptionCombiner.Builder(tokenized).build(); stream.reset("I .. exceptions!! "); assertEquals(ImmutableList.of("I", ".", ".", "exceptions", "!", "!"), stream.toStringList()); stream.reset("I ♥♥ exceptions"); assertEquals(ImmutableList.of("I", "♥♥", "exceptions"), stream.toStringList()); stream.reset("I .♥♥. exceptions"); assertEquals(ImmutableList.of("I", ".", "♥♥", ".", "exceptions"), stream.toStringList()); } @Test public void testPunctuationFilterDoesNotRemoveExceptionChars() { TokenStream stream = new PunctuationFilter(new PunctuationExceptionCombiner.Builder(tokenized).build()); stream.reset("I .. exceptions!! "); assertEquals(ImmutableList.of("I", "exceptions"), stream.toStringList()); stream.reset("I ♥♥ exceptions!!"); assertEquals(ImmutableList.of("I", "♥♥", "exceptions"), stream.toStringList()); } @Test public void testAddingPunctuationExceptions() { TokenStream stream = new PunctuationExceptionCombiner.Builder(tokenized).addExceptionChars(".").build(); stream.reset("I .. exceptions!! "); assertEquals(ImmutableList.of("I", "..", "exceptions", "!", "!"), stream.toStringList()); stream = new PunctuationExceptionCombiner.Builder(tokenized).addExceptionChars(".!").build(); stream.reset("I ..♥♥ exceptions!! "); assertEquals(ImmutableList.of("I", "..♥♥", "exceptions", "!!"), stream.toStringList()); } }