package ecologylab.bigsemantics.model.text;
import java.util.regex.Matcher;
import ecologylab.bigsemantics.html.utils.StringBuilderUtils;
import ecologylab.bigsemantics.model.TextToken;
import ecologylab.generic.StringTools;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.annotations.simpl_inherit;
@simpl_inherit
public class SemanticTextToken extends TextToken
{
protected Term term;
public SemanticTextToken ()
{
super();
// TODO Auto-generated constructor stub
}
public SemanticTextToken ( String s, ParsedURL h, int fontStyle, int tokenFontSize,
String delims, int faceIndex, int under )
{
super(s, h, fontStyle, tokenFontSize, delims, faceIndex, under);
// TODO Auto-generated constructor stub
}
public SemanticTextToken ( String s, String delims, ParsedURL h, int style, int fontSize,
int faceIndex )
{
super(s, delims, h, style, fontSize, faceIndex);
// TODO Auto-generated constructor stub
}
public SemanticTextToken ( String s, String delims, ParsedURL h )
{
super(s, delims, h);
// TODO Auto-generated constructor stub
}
public SemanticTextToken ( TextToken previousToken )
{
super(previousToken);
// TODO Auto-generated constructor stub
}
public static void noPunctuation(String input, StringBuilder termBuffy)
{
Matcher m = TermVector.WORD_REGEX.matcher(input);
if (m.find())
{
int start = m.start();
termBuffy.append(input, start, m.end());
StringTools.toLowerCase(termBuffy);
}
}
public Term term()
{
if (term == null)
{
StringBuilder termBuffy = StringBuilderUtils.acquire();
noPunctuation(string, termBuffy);
if (termBuffy.length() > 0)
term = TermDictionary.getTermForWord(termBuffy);
else
term = TermDictionary.STOP_WORD;
StringBuilderUtils.release(termBuffy);
}
return term;
}
public Term xterm()
{
if (term == null)
term = TermDictionary.getTermForUnsafeWord(this.getString());
return term;
}
}