// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.text.token;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.List;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.twitter.common.text.token.attribute.TokenType;
public class TokenizedCharSequenceTest {
@Test(expected=NullPointerException.class)
public void testNullConstructor() {
new TokenizedCharSequence.Builder(null).build();
}
@Test
public void testEmptyConstructor() {
// it is OK to create TokenizedCharSequence with empty text.
new TokenizedCharSequence.Builder("").build();
}
@Test
public void testTokenizedCharSequence() {
// exactly same contents
String text = "test test";
TokenizedCharSequence text1 = new TokenizedCharSequence.Builder(text).addToken(0, 4).addToken(5, 4).build();
TokenizedCharSequence text2 = new TokenizedCharSequence.Builder(text).addToken(0, 4).addToken(5, 4).build();
assertTrue(text1 != text2);
assertEquals(text1, text2);
assertEquals(text1.toString(), text2.toString());
assertEquals(text1.hashCode(), text2.hashCode());
// different contents
text = "test test test";
TokenizedCharSequence text3 = new TokenizedCharSequence.Builder(text).addToken(0, 4).addToken(5, 4).addToken(10, 4).build();
assertFalse(text1.equals(text3));
assertFalse(text1.toString().equals(text3.toString()));
assertFalse(text1.hashCode() == text3.hashCode());
// same contents but not String
StringBuffer buf = new StringBuffer("test test");
TokenizedCharSequence text4 = new TokenizedCharSequence.Builder(buf).addToken(0, 4).addToken(5, 4).build();
assertTrue(text1 != text4);
assertEquals(text1, text4);
assertEquals(text1.toString(), text4.toString());
assertEquals(text1.hashCode(), text4.hashCode());
}
@Test
public void testGetTokensOf() {
String text = "test, #hashtag, @username.";
TokenizedCharSequence tokenized = new TokenizedCharSequence.Builder(text)
.addToken(0, 4, TokenType.TOKEN)
.addToken(4, 1, TokenType.PUNCTUATION)
.addToken(6, 8, TokenType.HASHTAG)
.addToken(14, 1, TokenType.PUNCTUATION)
.addToken(16, 9, TokenType.USERNAME)
.addToken(25, 1, TokenType.PUNCTUATION)
.build();
assertEquals(3, tokenized.getTokensOf(TokenType.TOKEN, TokenType.HASHTAG, TokenType.USERNAME).size());
assertEquals(3, tokenized.getTokensOf(TokenType.PUNCTUATION).size());
List<String> hashtags = tokenized.getTokenStringsOf(TokenType.HASHTAG);
assertEquals(ImmutableList.of("#hashtag"), hashtags);
List<String> hash_user = tokenized.getTokenStringsOf(TokenType.HASHTAG, TokenType.USERNAME);
assertEquals(ImmutableList.of("#hashtag", "@username"), hash_user);
}
}