package com.antbrains.wordseg; public class Token { public static enum Type { UNKNOWN, ALPHA, NUMBER, PUNCT, WHITESPACE, CWORD, } private String normalizedText; private Type type; private String context; private int beginIndex; private int endIndex; private int posInc = 1; public int getPosInc() { return posInc; } public void setPosInc(int posInc) { this.posInc = posInc; } public Token(String context, int beginIndex, int endIndex) { this.context = context; this.beginIndex = beginIndex; this.endIndex = endIndex; } public Token(String text, String context, int beginIndex, int endIndex, Type attr) { this.normalizedText = text; this.context = context; this.beginIndex = beginIndex; this.endIndex = endIndex; this.type = attr; } public String getNormalizedText() { if (normalizedText == null) { normalizedText = context.substring(beginIndex, endIndex); } return normalizedText; } public String getOrigText() { return context.substring(beginIndex, endIndex); } public String getContext() { return context; } public int getBeginIndex() { return beginIndex; } public int getEndIndex() { return endIndex; } public int getLength() { return endIndex - beginIndex; } public Type getType() { return type; } public void setType(Type type) { this.type = type; } /** * 用于合并Token之用 * * @see com.qunar.nlp.chinese.ChineseSegmenter#segment */ public void setEndIndex(int endIndex) { this.endIndex = endIndex; } @Override public String toString() { return getOrigText(); } }