package com.twitter.common.text.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.lucene.util.AttributeSource; import org.junit.Assert; import org.junit.Test; import com.twitter.common.text.token.attribute.TokenType; import com.twitter.common.text.token.attribute.TokenTypeAttribute; /** * Test for TokenTypeAttributeSerializer. * @author Ugo Di Girolamo */ public class TokenTypeAttributeSerializerTest { private byte[] serialize(TokenType tokenType) throws IOException { AttributeSource attributeSource = new AttributeSource(); TokenTypeAttribute tokenTypeAttribute = attributeSource.addAttribute( TokenTypeAttribute.class); tokenTypeAttribute.setType(tokenType); TokenTypeAttributeSerializer serializer = new TokenTypeAttributeSerializer(); serializer.initialize(attributeSource, TokenStreamSerializer.CURRENT_VERSION); ByteArrayOutputStream output = new ByteArrayOutputStream(); TokenStreamSerializer.AttributeOutputStream outputStream = new TokenStreamSerializer.AttributeOutputStream(output); serializer.serialize(outputStream); return output.toByteArray(); } private TokenType deserialize(byte[] serialized) throws IOException { AttributeSource attributeSource = new AttributeSource(); TokenTypeAttribute tokenTypeAttribute = attributeSource.addAttribute( TokenTypeAttribute.class); TokenTypeAttributeSerializer serializer = new TokenTypeAttributeSerializer(); serializer.initialize(attributeSource, TokenStreamSerializer.CURRENT_VERSION); ByteArrayInputStream input = new ByteArrayInputStream(serialized); TokenStreamSerializer.AttributeInputStream inputStream = new TokenStreamSerializer.AttributeInputStream(input); serializer.deserialize(inputStream, null); return tokenTypeAttribute.getType(); } /** * Test that serializing and then deserializing a TokenType we get the original token back. */ @Test public void testSerializeAndDeserialize() throws Exception { for (TokenType tokenType : TokenType.values()) { byte[] serialized = serialize(tokenType); Assert.assertEquals(tokenType, deserialize(serialized)); } } /** * Test that the serialized token type uses exactly one byte. */ @Test public void testTokenTypesFitInOneByte() throws Exception { for (TokenType tokenType : TokenType.values()) { byte[] serialized = serialize(tokenType); Assert.assertEquals(1, serialized.length); } } // CHECKSTYLE:OFF MagicNumber /** * Since we serialize the ordinal of the TokenTypes, we need to guarantee that ordinal numbers * don't change. * So if you add a new TokenType, come here and update this test to include your TokenType. * Do NOT remove TokenTypes. */ @Test public void testKnownEnumNames() throws Exception { Assert.assertEquals(0, TokenType.TOKEN.ordinal()); Assert.assertEquals(1, TokenType.PUNCTUATION.ordinal()); Assert.assertEquals(2, TokenType.HASHTAG.ordinal()); Assert.assertEquals(3, TokenType.USERNAME.ordinal()); Assert.assertEquals(4, TokenType.EMOTICON.ordinal()); Assert.assertEquals(5, TokenType.URL.ordinal()); Assert.assertEquals(6, TokenType.STOCK.ordinal()); Assert.assertEquals(7, TokenType.values().length); } // CHECKSTYLE:ON MagicNumber }