package tathya.text.tokenizer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; /** * This class represents the tokenizer that breaks the input into * Word tokens. * @author anand * */ public class WordTokenizer implements ITokenizer { /* (non-Javadoc) * @see dygest.text.tokenizer.Tokenizer#tokenize(java.lang.String) */ public List<String> tokenize(String text) { return Arrays.asList(text.split("[ ,\\t\\n\\r\\f\\.;:\"\'-]+")); } }