package semanticMarkup.ling.learn.utility;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import semanticMarkup.ling.learn.knowledge.Constant;
public class StringUtility {
public StringUtility() {
// TODO Auto-generated constructor stub
}
public static String strip(String text) {
text=text.replaceAll("<(([^ >]|\n)*)>", " ");
text=text.replaceAll("<\\?[^>]*\\?>", " "); //<? ... ?>
text=text.replaceAll("&[^ ]{2,5};", " "); //remove
text=text.replaceAll("\\s+", " ");
return text;
}
/**
*
* @param text
* : string in which all punctuations to remove
* @param c
* : a punctuatin to keep
* @return: string after puctuations are removed except the one in c
*/
public static String removePunctuation(String text, String c) {
//System.out.println("Old: " + text);
if (c == null) {
text = text.replaceAll("[\\p{Punct}]", "");
} else {
text = text.replaceAll(c, "aaa");
text = text.replaceAll("[\\p{Punct}]", "");
text = text.replaceAll("aaa", c);
}
//System.out.println("New: " + text);
return text;
}
public static String trimString (String text){
String myText = text;
myText = myText.replaceAll("^\\s+|\\s+$", "");
return myText;
}
/**
* Helper of method updateTable: process word
*
* @param w
* @return
*/
public static String processWord(String word) {
//$word =~ s#<\S+?>##g; #remove tag from the word
//$word =~ s#\s+$##;
//$word =~ s#^\s*##;
word = word.replaceAll("<\\S+?>", "");
word = word.replaceAll("\\s+$", "");
word = word.replaceAll("^\\s*", "");
return word;
}
public static String removeAll(String word, String regex) {
String newWord = word.replaceAll(regex, "");
return newWord;
}
public static String removeAllRecursive(String text, String regex) {
String newText = text.replaceAll(regex, "");
while (!newText.equals(text)) {
text = newText;
newText = text.replaceAll(regex, "");
}
return newText;
}
// if($t !~ /\b(?:$STOP)\b/ && $t =~/\w/ && $t !~ /\d/ && length $t > 1){
public static boolean isWord(String token) {
Constant myConstant = new Constant();
String regex = "\\b(" + myConstant.STOP + ")\\b";
if (token.matches(regex)) {
return false;
}
if (!token.matches("\\w+")) {
return false;
}
if (token.length() <= 1) {
return false;
}
return true;
}
/**
* in perl, it escape [] {} and () for mysql regexp, not perl regrexp. May
* not be necessary in Java
*
* @param singularPluralVariations
* @return
*/
public static String escape(String singularPluralVariations) {
// TODO Auto-generated method stub
return singularPluralVariations;
}
/**
* check if a word is a word in the wordList
*
* @param word
* the word to check
*
* @param wordList
* the words to match to
* @return a boolean variable. true mean word is a word in the list. false
* means it is not
*/
public static boolean isMatchedWords(String word, String wordList){
return word.matches("^.*\\b(?:"+wordList+")\\b.*$");
}
/**
* Given a list of words in one string in the form of
* "(word1|word2|...|wordn)", remove the word from the list if it is in the
* list.
*
* @param word
* the word to remove
* @param wordList
* the list to remove the word from
* @return the list after remove the word
*/
public static String removeFromWordList(String word, String wordList) {
String newWordList = wordList;
newWordList = newWordList.replaceAll("\\b" + word + "\\b", "");
newWordList = newWordList.replaceAll("^\\|", "");
newWordList = newWordList.replaceAll("\\|\\|", "|");
newWordList = newWordList.replaceAll("\\|$", "");
return newWordList;
}
// public static boolean equalsWithNull(String s1, String s2) {
//// boolean flag = false;
//// flag = (s1==null)? (s2==null) : s1.equals(s2);
////
//// return flag;
// return StringUtils.equals(s1, s2);
// }
/**
* Convert a string array of to a string of words separated by space
*
* @param words
* @return the string
*/
public static String stringArray2String(String [] words) {
String wordsString = "";
for (int i=0;i<words.length;i++) {
wordsString = wordsString + words[i] + " ";
}
wordsString = wordsString.substring(0, wordsString.length()-1);
return wordsString;
}
// public static List<String>
//Arrays.asList
/**
* Get a splice of the string list between the index of the start
* (inclusive) and the end (exclusive)from the string list
*
* @param words
* the string list
* @param start
* the start index of the section
* @param end
* the end index of the section
* @return the splice
*/
public static List<String> stringArraySplice(List<String> words, int start, int end) {
List<String> splicedWords = new ArrayList<String>();
splicedWords.addAll(words.subList(start, end));
return splicedWords;
}
/**
* Join a list of string together
*
* @param separater
* @param list
* @return the string
*/
public static String joinList(String separater, List<String> list){
String result = "";
for (int i=0;i<list.size();i++) {
result = result + list.get(i)+separater;
}
if (!result.equals("")) {
result = result.substring(0, result.length()-separater.length());
}
return result;
}
/**
* Given a regex and an input, returns a matcher to match the regex to the
* input
*
* @param regex
* the regular expression
* @param input
* the input char sequence
* @return the matcher
*/
public static Matcher createMatcher(CharSequence input, String regex) {
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(input);
return m;
}
/**
* Null-safe method to match a text against the pattern
*
* @param pattern
* the pattern to match against
* @param text
* the text to match
* @return true if matches, false otherwise
*/
public static boolean isMatchedNullSafe(String text, String pattern) {
if (pattern == null || text == null) {
return false;
}
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(text);
if (m.find()) {
return true;
}
else {
return false;
}
}
/**
* Null-safe method to match a entire text against the pattern
*
* @param pattern
* the pattern to match against
* @param text
* the text to match
* @return true if matches, false otherwise
*/
public static boolean isEntireMatchedNullSafe(String text, String pattern) {
if (pattern == null || text == null) {
return false;
}
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(text);
if (m.matches()) {
return true;
}
else {
return false;
}
}
public static String replaceAllBackreference(String text, String regex, String replacement) {
Matcher m = createMatcher(text, regex);
text = m.replaceAll(replacement);
return text;
}
public static Set<String> setSubtraction(Set<String> a, Set<String> b) {
// c = a - b
if (a == null || b == null) {
return a;
}
Set<String> c = new HashSet<String>();
Iterator<String> iter = a.iterator();
while(iter.hasNext()) {
String element = iter.next();
if (!b.contains(element)) {
c.add(element);
}
}
return c;
}
/**
* Remove the last char in a string
*
* @param str
* the input string
* @return string after remove of last char
*/
public static String chop(String str) {
return str.substring(0, str.length() - 1);
}
/**
* Escapes special characters in perl regular expression
*
* @param str
* the string to escape
* @return escaped string
*/
public static String escapePerlRegex(String str) {
if (str != null) {
str = str.replaceAll("([\\(\\)\\[\\]\\{\\}\\.\\|\\-\\+\\?\\*])",
"\\\\$1");
}
return str;
}
}