/**
*
*/
package com.maalaang.omtwitter.text;
/**
* @author Sangwon Park
*
*/
public class InfoboxValueTokenizer {
public static final String REGEX_WORD_SPLIT = "[\\s\\p{Punct}&&[^\\-]]+";
public static final String REGEX_VALUE_SPLIT = "[,;]+";
public static String[] tokenizeToWord(String s) {
return s.split(REGEX_WORD_SPLIT);
}
public static String[] tokenizeToValues(String s) {
return s.replaceAll("\\s+", " ").split(REGEX_VALUE_SPLIT);
}
}