package chipmunk.segmenter;
import java.util.LinkedList;
import java.util.List;
public class RulebasedSegmenter extends Segmenter {
private static final long serialVersionUID = 1L;
@Override
public SegmentationReading segment(Word word) {
List<String> segments = new LinkedList<>();
List<String> tags = new LinkedList<>();
StringBuilder sb = new StringBuilder();
String current_tag = null;
String form = word.getWord();
for (int i=0; i<form.length(); i++) {
char c = form.charAt(i);
String new_tag;
if (Character.isLetter(c)) {
new_tag = TagSet.ALPHA;
} else if (Character.isDigit(c)) {
new_tag = TagSet.NUMBER;
} else {
new_tag = TagSet.SPECIAL;
}
if (current_tag != null) {
if (!current_tag.equals(new_tag) || new_tag == TagSet.SPECIAL) {
assert sb.length() > 0;
segments.add(sb.toString());
tags.add(current_tag);
sb.setLength(0);
}
}
sb.append(c);
current_tag = new_tag;
}
if (current_tag != null) {
segments.add(sb.toString());
tags.add(current_tag);
}
return new SegmentationReading(segments, tags);
}
}