/** * */ package com.maalaang.omtwitter.text; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author Sangwon Park * */ public class OMTweetToken_Impl implements OMTweetToken { public final static String REGEX_DUPLICATE = "(.)\\1{2,}"; private int type = TOKEN_TYPE_NORMAL; private int begin = 0; private int end = 0; private String text = null; private static Pattern patternDuplicate = Pattern.compile(REGEX_DUPLICATE); public OMTweetToken_Impl(int type, int begin, int end, String text) { this.type = type; this.begin = begin; this.end = end; this.text = text; } public int getType() { return type; } public int getBegin() { return begin; } public int getEnd() { return end; } public String getText() { return text; } public String getNormalizedText() { switch (type) { case TOKEN_TYPE_USER: return NORMALIZED_TEXT_USER; case TOKEN_TYPE_URL: return NORMALIZED_TEXT_URL; case TOKEN_TYPE_HASHTAG: return text.toLowerCase().replaceFirst("#", "").replaceAll("[0-9]", "N"); case TOKEN_TYPE_NORMAL: String normText = text.toLowerCase(); Matcher matcherDuplicate = patternDuplicate.matcher(normText); while (matcherDuplicate.find()) { String duplicatePart = matcherDuplicate.group(); normText = normText.replace(duplicatePart, duplicatePart.substring(0, 2)); matcherDuplicate = patternDuplicate.matcher(normText); } return normText.replace("[0-9]", "N"); default: throw new IllegalStateException(); } } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "OMTweetTokenDefault [type=" + type + ", begin=" + begin + ", end=" + end + ", text=" + text + ", normalizedText=" + getNormalizedText() + "]"; } }