package com.felix.util;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Methods around string processing.
*
* @author felix
*
*/
public class StringUtil {
/**
* Return the whitespace separated tokens of a string.
*
* @param s
* @return
*/
public static Vector<String> stringToVector(String s) {
Vector<String> ret = new Vector<String>();
StringTokenizer st = new StringTokenizer(s);
while (st.hasMoreTokens()) {
ret.add(st.nextToken());
}
return ret;
}
/**
* Retrieve a match from a pattern for a string.
*
* @param s
* The content string, e.g. "bla fio boo.jpg bjla"
* @param pattern
* The pattern, e.g. \s(.{3}\..{3})\s
* @param i
* The group number, e.g. 1
* @return e.g. "boo.jpg" or null if not found
*/
public static String getFirstGroup(String s, String pattern, int i) {
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(s);
if (m.find()) {
return m.group(i);
}
return null;
}
/**
* Convert a String containing unnicode chars to öäüß
*
* @param in
* The input.
* @return The output.
*/
public static String uniCodeToUmlaut(String in) {
String result = in;
result = result.replace("\\u00fc", "ü");
result = result.replace("\\u00f6", "ö");
result = result.replace("\\u00e4", "ä");
result = result.replace("\\u00dc", "Ü");
result = result.replace("\\u00d6", "Ö");
result = result.replace("\\u00c4", "Ä");
result = result.replace("\\u00df", "ß");
// result=result.replace(/ü/g, "ü");
// result=result.replace(/ö/g, "ö");
// result=result.replace(/ä/g, "ä");
// result=result.replace(/Ü/g, "Ü");
// result=result.replace(/Ö/g, "Ö");
// result=result.replace(/Ä/g, "Ä");
// result=result.replace(/ß/g, "ß");
return result;
}
/**
* Remove all characters that are not alphanumeric.
*
* @param in
* The input, e.g. "Alpha, Romeo 12!"
* @return The output, e.g. "Alpha Romeo 12"
*/
public static String removeNonAlpanumericCharacters(String in) {
return in.replaceAll("[^A-Za-z0-9 ]", "");
}
/**
* Check if a string contains any alphanumeric characters.
*
* @param in
* The string to check.
* @return
*/
public static boolean checkIfWordsContained(String in) {
StringTokenizer st = new StringTokenizer(in);
while (st.hasMoreTokens()) {
if (st.nextToken().matches("\\p{Alnum}+"))
return true;
}
return false;
}
public static boolean isBracketClosed(String test, String oBracket,
String cBracket) {
int oNum = countTokens(test, oBracket);
int cNum = countTokens(test, cBracket);
if (oNum == cNum)
return true;
return false;
}
/**
* Get all elements but one from an String array.
*
* @param array
* The input, e.g. a b c d
* @param index
* The index, e.g. 3
* @return The output, e.g. a b c
*/
public static String[] getAllButOne(String[] array, int index) {
String[] ret = new String[array.length - 1];
int i = 0;
for (int j = 0; j < index; j++) {
ret[i++] = array[j];
}
for (int k = index + 1; k < array.length; k++) {
ret[i++] = array[k];
}
return ret;
}
/**
* This method checks if a String contains only numbers
*/
public static boolean containsOnlyNumbers(String str) {
// It can't contain only numbers if it's null or empty...
if (str == null || str.length() == 0)
return false;
for (int i = 0; i < str.length(); i++) {
// If we find a non-digit character we return false.
if (!Character.isDigit(str.charAt(i)))
return false;
}
return true;
}
/**
* Extract a picture URL from some content.
*
* @param contents
* The contents, e.g.
* "blablahttp://test.com/index/bla.jpgquatschnochmal "
*
* @returnThe enclosed picture URL, e.g. http://test.com/index/bla.jpg or ""
* if not found.
*/
public static String extracPictureURL(String contents) {
String[] extens = new String[] { "\\.jpg", "\\.JPG", "\\.png",
"\\.PNG", "\\.gif", "\\.GIF" };
String ret = "";
for (String ext : extens) {
Pattern p = Pattern.compile("http://([^\\:]+?)" + ext);
Matcher m = p.matcher(contents);
if (m.find()) {
ret = m.group();
}
return ret;
}
return ret;
}
/**
* Return the tokens of a string.
*
* @param s
* The String.
* @param sep
* The separatpr String.
* @return
*/
public static Vector<String> stringToVector(String s, String sep) {
Vector<String> ret = new Vector<String>();
StringTokenizer st = new StringTokenizer(s, sep);
while (st.hasMoreTokens()) {
ret.add(st.nextToken());
}
return ret;
}
/**
* Get a vector of strings as string array.
*
* @param v
* The vector.
* @return The string array.
*/
public static String[] vectorToStringArray(Vector<String> v) {
String ret[] = new String[v.size()];
int i = 0;
for (String s : v) {
ret[i++] = s;
}
return ret;
}
/**
* Get a string from all vaules of a string vector, sparated by blanc.
*
* @param v
* The input vector
* @return The output String.
*/
public static String vectorToString(Vector<String> v) {
String ret = "";
for (String s : v) {
ret += s + " ";
}
return ret;
}
/**
* Get all words from a string except the first one.
*
* @param a
* The input, e.g. "a b"
* @return The result, e.g. "b"
*/
public static String getAllButFirst(String s) {
return (Util.arrayToString(getAllButFirst(stringToArray(s, " "))));
}
/**
* Get all words from a string except the first one.
*
* @param a
* The input, e.g. "a b"
* @return The result, e.g. "a"
*/
public static String getAllButLast(String s) {
return (Util.arrayToString(getAllButLast(stringToArray(s, " "))));
}
/**
* Get the number of blank separated tokens in a String.
*
* @param t
* The input, e.g. "foo bar"
* @return The output, e.g. "2"
*/
public static int numWords(String t) {
return new StringTokenizer(t).countTokens();
}
/**
* Get all words from a string array except the first one.
*
* @param a
* The input, e.g. {"a", "b"}
* @return The result, e.g. {"b"}
*/
public static String[] getAllButFirst(String[] a) {
if (a == null || a.length == 0)
return null;
String[] ret = new String[a.length - 1];
for (int i = 1; i < a.length; i++) {
ret[i - 1] = a[i];
}
return ret;
}
/**
* Get all words from a string array except the last one.
*
* @param a
* The input, e.g. {"a", "b"}
* @return The result, e.g. {"a"}
*/
public static String[] getAllButLast(String[] a) {
if (a == null || a.length == 0)
return null;
String[] ret = new String[a.length - 1];
for (int i = 0; i < a.length - 1; i++) {
ret[i] = a[i];
}
return ret;
}
/**
* Search a complete string in vector elements (vector:string ==
* search:string).
*
* @param s
* @param v
* @return
*/
public static boolean isStringInVector(String s, Vector<String> v) {
if (v != null && s != null) {
for (Iterator<String> iterator = v.iterator(); iterator.hasNext();) {
String string = (String) iterator.next();
if (s.trim().compareTo(string.trim()) == 0)
return true;
}
}
return false;
}
/**
* Get last tokens of string tokenizer separated by blank.
*
* @param st
* The Tokenizer.
*
* @return The String.
*/
public static String getRestOfLine(StringTokenizer st) {
String ret = "";
while (st.hasMoreTokens()) {
ret += st.nextToken() + " ";
}
return ret.trim();
}
/**
* Search a complete string ignoring case in vector elements (vector:string
* == search:string).
*
* @param s
* @param v
* @return
*/
public static boolean isStringInVectorIgnoreCase(String s, Vector<String> v) {
if (v != null && s != null) {
for (Iterator<String> iterator = v.iterator(); iterator.hasNext();) {
String string = (String) iterator.next();
if (s.equalsIgnoreCase(string))
// if (s.trim().toLowerCase()
// .compareTo(string.trim().toLowerCase()) == 0)
return true;
}
}
return false;
}
/**
* Return index of String in Vector ignoring case.
*
* @param s
* The string.
* @param v
* The vector.
* @return The index or -1 if not included.
*/
public static int indexOfStringInVectorIgnoreCase(String s, Vector<String> v) {
int i = 0;
if (v != null && s != null) {
for (Iterator<String> iterator = v.iterator(); iterator.hasNext();) {
String string = (String) iterator.next();
if (s.equalsIgnoreCase(string))
return i;
i++;
}
}
return -1;
}
/**
* Test if a trimmed string is element of an array.
*
* @param s
* @param sa
* @return
*/
public static boolean isStringInArray(String s, String[] sa) {
if (sa != null && s != null) {
for (int i = 0; i < sa.length; i++) {
String string = sa[i];
if (s.trim().compareTo(string.trim()) == 0)
return true;
}
}
return false;
}
/**
* Return the index of a string in an array.
*
* @param s
* The string.
* @param sa
* The array.
* @return The index or -1 of not included.
*/
public static int indexOfStringInArray(String s, String[] sa) {
if (sa != null && s != null) {
for (int i = 0; i < sa.length; i++) {
String string = sa[i];
if (s.trim().compareTo(string.trim()) == 0)
return i;
}
}
return -1;
}
/**
* Return the distance between two strings in a array.
*
* @param a
* The one string, e.g. "foo"
* @param b
* The other String, e.g. "bar"
* @param sa
* The array, e.g. "bla bar blubb blubb foo bar"
* @return the results, e.g. 3 or a very large integer
*/
public static int stringDistanceInArray(String a, String b, String[] sa) {
boolean aFound = false, bFound = false;
int distance = 0;
if (sa != null && a != null && b != null) {
for (int i = 0; i < sa.length; i++) {
String string = sa[i];
if (a.trim().compareTo(string.trim()) == 0) {
if (bFound) {
return distance;
}
aFound = true;
}
if (b.trim().compareTo(string.trim()) == 0) {
if (aFound) {
return distance;
}
bFound = true;
}
if (aFound || bFound)
distance++;
}
}
return Integer.MAX_VALUE;
}
/**
* Return the distance between two strings in a array.
*
* @param a
* The one string, e.g. "foo"
* @param b
* The other String, e.g. "bar"
* @param sa
* The array, e.g. "bla bar blubb blubb foo bar"
* @return the results, e.g. 3 or a very large integer
*/
public static int stringDistanceInVector(String a, String b,
Vector<String> sa) {
boolean aFound = false, bFound = false;
int distance = 0;
if (sa != null && a != null && b != null) {
for (String string : sa) {
if (a.trim().compareTo(string.trim()) == 0) {
if (bFound) {
return distance;
}
aFound = true;
}
if (b.trim().compareTo(string.trim()) == 0) {
if (aFound) {
return distance;
}
bFound = true;
}
if (aFound || bFound)
distance++;
}
}
return Integer.MAX_VALUE;
}
/**
* Return true if a string ends with one of the strings in array,
*
* @param s
* The string, e.g. "foobla"
* @param sa
* The array, e.g. "b", "b.a", "bla"
* @return In this case, true.
*/
public static boolean isStringEndWithArray(String s, String[] sa) {
if (sa != null && s != null) {
for (int i = 0; i < sa.length; i++) {
String string = sa[i];
if (s.endsWith(string))
return true;
}
}
return false;
}
/**
* Return true if a keyword from an array is in a string, e.g.
* "banana apple" is in "i eat apple"
*
* @param s
* The String, e.g. "i eat apple"
* @param sa
* The array, e.g. {"banana", "apple"}
* @return The result, e.g. true.
*/
public static boolean isKeywordArrayInString(String s, String[] sa) {
if (sa != null && s != null) {
StringTokenizer st = new StringTokenizer(s);
while (st.hasMoreTokens()) {
if (isStringInArray(st.nextToken(), sa))
return true;
}
}
return false;
}
/**
* Test for a string and an array, whether the string starts with one of the
* stings form the array.
*
* @param s
* The string, e.g. // blab bla
* @param sa
* The array, e.g. "#", "//"
* @return The result, e.g. true
*/
public static boolean startsStringWithArray(String s, String[] sa) {
if (sa != null && s != null) {
for (String st : sa) {
if (s.startsWith(st))
return true;
}
}
return false;
}
/**
* Test for a string and an array, whether the string contains one of the
* stings form the array.
*
* @param s
* The string, e.g. blab bla // bla
* @param sa
* The array, e.g. "//" "#"
* @return The result, e.g. true
*/
public static boolean containsStringWithArray(String s, String[] sa) {
if (sa != null && s != null) {
for (String st : sa) {
if (s.indexOf(st) >= 0)
return true;
}
}
return false;
}
/**
* Test for a string and an array, whether the string ends with one of the
* stings form the array.
*
* @param s
* The string, e.g. blab bla //
* @param sa
* The array, e.g. "//" "#"
* @return The result, e.g. true
*/
public static boolean endsStringWithArray(String s, String[] sa) {
if (sa != null && s != null) {
for (String st : sa) {
if (s.endsWith(st))
return true;
}
}
return false;
}
/**
* Return true if a keyword from an array is in a string, e.g.
* "banana apple" is in "i eat apple"
*
* @param s
* The String, e.g. "i eat apple"
* @param sa
* The array, e.g. {"banana", "apple"}
* @return The result, e.g. true.
*/
public static boolean isKeywordArrayInStringIgnoreCase(String s, String[] sa) {
if (sa != null && s != null) {
StringTokenizer st = new StringTokenizer(s);
while (st.hasMoreTokens()) {
if (isStringInArrayIgnoreCase(st.nextToken(), sa))
return true;
}
}
return false;
}
/**
* Test if a trimmed string ignoring case is element of an array.
*
* @param s
* @param sa
* @return
*/
public static boolean isStringInArrayIgnoreCase(String s, String[] sa) {
if (sa != null && s != null) {
for (int i = 0; i < sa.length; i++) {
String string = sa[i];
if (s.equalsIgnoreCase(string))
// if (s.trim().toLowerCase()
// .compareTo(string.trim().toLowerCase()) == 0)
return true;
}
}
return false;
}
/**
* Remove all Tags (xml-elements) inside a string.
*
* @param in
* The string.
* @return The sting without tags.
*/
public static String stripTags(String in) {
String temp = in;
return temp.replaceAll("<[^>]*>", "");
}
/**
* Remove all newlines inside a string.
*
* @param in
* The string.
* @return The sting without tags.
*/
public static String stripNewlines(String in) {
String temp = in;
return temp.replaceAll("\n", "");
}
/**
* Get a String without whitespace but blank separated tokens.
*
* @param in
* The String.
* @return The String.
*/
public static String stripWhiteSpce(String in) {
String ret = "";
StringTokenizer st = new StringTokenizer(in);
while (st.hasMoreTokens()) {
ret += st.nextToken() + " ";
}
return ret.trim();
}
/**
* Count number of newlines in a string.
*
* @param in
* The String.
* @return The number of newlines.
*/
public static int countNewlines(String in) {
int ret = 0;
for (int i = 0; i < in.length(); i++) {
char ch = in.charAt(i);
if (ch == '\n')
ret++;
}
return ret;
}
/**
* Remove all Tags (xml-elements) inside a part of a string.
*
* @param string
* the String
* @param start
* The start position.
* @param end
* The end position.
* @return
*/
public static String stripTagsAtPosition(String string, int start, int end) {
String temp = string.substring(start, end);
temp = stripTags(temp);
return string.substring(0, start) + temp
+ string.substring(end, string.length());
}
/**
* Remove the last xml tag in a string.
*
* @param string
* The string, e.g <bla>foo</bla>
* @return The result, e.g. "<bla>foo"
*/
public static String stripLastTag(String string) {
int start = string.lastIndexOf("<");
if (start < 1)
return string;
int end = string.length();
String temp = string.substring(start, end);
temp = stripTags(temp);
return string.substring(0, start) + temp
+ string.substring(end, string.length());
}
/**
* Sourround a string by xml-tags
*
* @param string
* The string, e.g. foo
* @param tag
* The tag, e.g. bar
* @return The return, e.g. <bar>foo</bar>
*/
public static String tagString(String string, String tag) {
return "<" + tag + ">" + string + "</" + tag + ">";
}
/**
* Surround a string by xml-tags, attributes are taken into account.
*
* @param string
* The string, e.g. blafoobla
* @param tag
* The tag, e.g. bar color="es a"
* @param start
* The beginning, e.g. 3
* @param end
* The end, e.g. 6
*
* @return The return, e.g. bla<bar color="es a">foo</bar>bla
*/
public static String tagStringAtPosition(String string, String tag,
int start, int end) {
StringTokenizer st = new StringTokenizer(tag);
String tagname = "", attribute = "";
tagname = st.nextToken();
while (st.hasMoreTokens()) {
attribute += st.nextToken() + " ";
}
if (attribute.length() > 0)
return string.substring(0, start) + "<" + tagname + " "
+ attribute.trim() + ">" + string.substring(start, end)
+ "</" + tagname + ">"
+ string.substring(end, string.length());
else
return string.substring(0, start) + "<" + tagname + ">"
+ string.substring(start, end) + "</" + tagname + ">"
+ string.substring(end, string.length());
}
/**
* Return the last position of a key string in another string (like
* "indexOf", but searching backwards).
*
* @param string
* The String to search.
* @param key
* The keyString.
* @param endSearch
* The last position.
* @return The position of 0 if not found.
*/
public static int lastPositionOf(String string, String key, int endSearch) {
int start = 0, ind = 0, laststart = 0;
while (ind < endSearch) {
start = string.substring(0, endSearch - 1).indexOf(key, ind++);
if (start > -1) {
laststart = start;
}
}
start = laststart;
if (start > 0) {
start += 1;
}
return start;
}
/**
* Test if a String is not null and contains characters besides whitespace.
*
* @param s
* The String.
* @return True or False.
*/
public static boolean isFilled(String s) {
if (s != null && s.trim().length() > 0 && s.compareTo("null")!=0) {
return true;
}
return false;
}
/**
* Test if a String is null or contains no characters besides whitespace.
*
* @param s
* The String.
* @return True or False.
*/
public static boolean isEmpty(String s) {
if (s == null || s.trim().length() == 0 || s.compareTo("null") == 0) {
return true;
}
return false;
}
/**
* Return the blank separated tokens of a string.
*
* @param s
* @return
*/
public static String[] stringToArray(String s) {
String ret[] = null;
StringTokenizer st = new StringTokenizer(s);
ret = new String[st.countTokens()];
int i = 0;
while (st.hasMoreTokens()) {
ret[i++] = st.nextToken();
}
return ret;
}
/**
* Capitalize one word.
*
* @param inputWord
* The input, e.g. "aBa"
* @return The output, e.g "ABa"
*/
public static String capitalizeOneWord(String inputWord) {
String firstLetter = inputWord.substring(0, 1); // Get first letter
String remainder = inputWord.substring(1); // Get remainder of word.
return firstLetter.toUpperCase() + remainder;
}
/**
* Change capitalization of an input word
*
* @param inputWord
* The input word, e-g- "foo" or "Bar"
* @return The result, e.g. "Foo" or "bar"
*/
public static String switchCapitalization(String inputWord) {
if (inputWord.matches("^[A-Z].*")) {
String firstLetter = inputWord.substring(0, 1); // Get first letter
String remainder = inputWord.substring(1); // Get remainder of word.
return firstLetter.toLowerCase() + remainder;
} else {
String firstLetter = inputWord.substring(0, 1); // Get first letter
String remainder = inputWord.substring(1); // Get remainder of word.
return firstLetter.toUpperCase() + remainder;
}
}
/**
* Return a string array in one string,
*
* @param array
* The array, e.g. "the", "array"
* @return E.g. "the array"
*/
public static String arrayToString(String[] array) {
String ret = "";
for (String s : array) {
ret += s + " ";
}
return ret.trim();
}
/**
* Capitalize a number of words.
*
* @param in
* The input, e.g. "aBa hJk-sdf"
* @return The output, e.g. "Aba Hjk-Sdf"
*/
public static String capitalizeMultipleWords(String in) {
String ret = "";
StringTokenizer st = new StringTokenizer(in, " ");
while (st.hasMoreTokens()) {
String t = st.nextToken();
StringTokenizer st2 = new StringTokenizer(t, "-");
while (st2.hasMoreTokens()) {
ret += capitalizeOneWord(st2.nextToken()) + "-";
}
ret = ret.substring(0, ret.length() - 1) + " ";
}
return ret.trim();
}
/**
* Capitalize multiple words if tey don't appear in a list of stopwords.
*
* @param in
* The words, e.g. "boo-fa fum"
* @param stopwords
* The stopwords, e.g. "fum"
* @return The result, e.g. "Boo-Fa fum"
*/
public static String capitalizeMultipleWords(String in, String[] stopwords) {
String ret = "";
StringTokenizer st = new StringTokenizer(in, " ");
while (st.hasMoreTokens()) {
String t = st.nextToken();
StringTokenizer st2 = new StringTokenizer(t, "-");
while (st2.hasMoreTokens()) {
String test = st2.nextToken();
if (isStringInArray(test, stopwords)) {
ret += test + "-";
} else {
ret += capitalizeOneWord(test) + "-";
}
}
ret = ret.substring(0, ret.length() - 1) + " ";
}
return ret.trim();
}
/**
* Uppercase all abbreviations, i.e. words with length at most 4.
*
* @param in
* The input, e.g. "head cia"
* @return The output, e.g "Head CIA"
*/
public static String abbreviationsToUpperCase(String in) {
String ret = "";
StringTokenizer st = new StringTokenizer(in);
while (st.hasMoreTokens()) {
String word = st.nextToken();
if (word.length() <= 4) {
ret += word.toUpperCase() + " ";
} else {
ret += capitalizeOneWord(word) + " ";
}
}
return ret.trim();
}
/**
* Capitalize a named entitiy.
*
* @param in
* The input, e.g. "werner von siemens"
* @param stopwrods
* The stopwords, e.g. "de la von van für"
* @return The output, e.g. "Werner von Siemens"
*/
public static String capitalizeNamedEntities(String in, String[] stopwords) {
String ret = "";
StringTokenizer st = new StringTokenizer(in);
while (st.hasMoreTokens()) {
String word = st.nextToken();
if (Util.isStringContainedInArray(word, stopwords)) {
ret += word + " ";
} else {
ret += capitalizeOneWord(word) + " ";
}
}
return ret.trim();
}
/**
* Return the blank separated tokens of a string.
*
* @param s
* @return
*/
public static String[] stringToArray(String s, String seperator) {
String ret[] = null;
StringTokenizer st = new StringTokenizer(s, seperator);
int countTokens = st.countTokens();
if (countTokens == 0)
return null;
ret = new String[countTokens];
int i = 0;
while (st.hasMoreTokens()) {
ret[i++] = st.nextToken();
}
return ret;
}
/**
* Test if a word starts with lowercase.
*
* @param word
* The word, e.g. "foo"
* @return The result, e.g. true
*/
public static boolean isLowerCase(String word) {
return word.matches("[a-z].*");
}
/**
* Test if a word starts with lowercase.
*
* @param word
* The word, e.g. "Foo"
* @return The result, e.g. true
*/
public static boolean isUpperCase(String word) {
return word.matches("[A-Z].*");
}
/**
* Convert a String to a array after some offset.
*
* @param s
* The string, e.g. "id,3,5,4"
* @param seperator
* The seeparator, e.g. ","
* @param offset
* The offset, eg "1"
* @return The array, e.g. "3", "5", "4"
*/
public static String[] stringToArray(String s, String seperator, int offset) {
String ret[] = null;
StringTokenizer st = new StringTokenizer(s, seperator);
ret = new String[st.countTokens() - offset];
int i = 0, offsetCounter = 0;
while (st.hasMoreTokens()) {
String cand = st.nextToken();
if (offsetCounter++ >= offset)
ret[i++] = cand;
}
return ret;
}
/**
* Print something to System.out.
*
* @param msg
*/
public static void printOut(String msg) {
System.out.println(msg);
}
/**
* Print something to System.out.
*
* @param msg
*/
public static void printOutArray(String[] msg) {
System.out.println(arrayToString(msg));
}
/**
* Return a string surrounded by two other strings.
*
* @param target
* The containing string, e.g. "bla[[foo]]bar".
* @param front
* The front trigger, e.g. "[[".
* @param rear
* The rear trigger, e.g. "]]".
* @return The contained String (if any), e.g. "foo" or null if not found
*/
public static String getStringBetween(String target, String front,
String rear) {
int frontIndex = target.indexOf(front);
if (frontIndex < 0)
return null;
int pos1 = frontIndex + front.length();
int pos2 = target.substring(pos1).indexOf(rear);
if (pos1 > 0 && pos2 > 0) {
return target.substring(pos1, pos2 + pos1);
} else {
return null;
}
}
/**
* Get the string between two other strgins, where the back string will be
* chosen from an array of possible string, and selecting the shortest
* possibility.
*
* @param target
* The target, e.g. "asda bla blobli foo faa ; asd"
* @param front
* The first string, e.g. "bla"
* @param rears
* The back possibilites, e.g. {"foo", ";", "faa"}
* @return e.g. " bloblo " or null if not found.
*/
public static String getStringBetween(String target, String front,
String[] rears) {
int frontIndex = target.indexOf(front);
if (frontIndex < 0)
return null;
int pos1 = frontIndex + front.length();
int minRearPos = target.length();
for (String r : rears) {
int pos2 = target.substring(pos1).indexOf(r);
if (pos2 < 0)
continue;
if (pos2 < minRearPos)
minRearPos = pos2;
}
if (pos1 > 0 && minRearPos > 0 && minRearPos < target.length()) {
return target.substring(pos1, minRearPos + pos1);
} else {
return null;
}
}
/**
* Return a string with some parrts removed.
*
* @param all
* The complete String, e.g. "foo bar boo"
* @param sub
* The part, e.g. "bar"
* @return The result, e.g. "foo boo"
*/
public static String getWithoutSubstring(String all, String sub) {
int i = all.indexOf(sub);
if (i == -1)
return all;
int len = sub.length();
return all.substring(0, i) + all.substring(i + len);
}
public static String getStringBetween(String target, String[] fronts,
String[] rears) {
int minFrontPos = target.length();
int minRearPos = target.length();
for (String f : fronts) {
int pos1 = target.indexOf(f) + f.length();
if (pos1 - f.length() < 0)
continue;
if (pos1 < minFrontPos)
minFrontPos = pos1;
for (String r : rears) {
int pos2 = target.substring(minFrontPos).indexOf(r);
if (pos2 < 0)
continue;
if (pos2 < minRearPos)
minRearPos = pos2;
}
}
if (minFrontPos > 0 && minFrontPos < target.length() && minRearPos > 0
&& minRearPos < target.length()) {
return target.substring(minFrontPos, minRearPos + minFrontPos);
} else {
return null;
}
}
public static String[] removeLastStopwords(String[] query,
String[] stopwords) {
Vector<String> ret = new Vector<String>();
boolean stopped = false;
for (int i = query.length - 1; i >= 0; i--) {
String word = query[i];
if (!isStringInArray(word, stopwords)) {
stopped = true;
ret.add(0, word);
} else {
if (stopped)
ret.add(0, word);
}
}
return vectorToStringArray(ret);
}
public static String[] removeFirstStopwords(String[] query,
String[] stopwords) {
Vector<String> ret = new Vector<String>();
boolean stopped = false;
for (String word : query) {
if (!isStringInArray(word, stopwords)) {
stopped = true;
ret.add(word);
} else {
if (stopped)
ret.add(word);
}
}
return vectorToStringArray(ret);
}
public static String[] removeOuterStopwords(String[] query,
String[] stopwords) {
String[] temp = removeFirstStopwords(query, stopwords);
return removeLastStopwords(temp, stopwords);
}
/**
* Remove stopwords from a query.
*
* @param query
* The query, e.g. "foo fong bar fong"
* @param stopwords
* The stopwords, e.g. "fong"
* @return The result, e.g. "foo bar"
*/
public static String[] removeStopwords(String[] query, String[] stopwords) {
Vector<String> ret = new Vector<String>();
for (String word : query) {
if (!isStringInArray(word, stopwords)) {
ret.add(word);
}
}
return vectorToStringArray(ret);
}
/**
* Remove stopwords from a query.
*
* @param query
* The query, e.g. "foo Fong bar fong"
* @param stopwords
* The stopwords, e.g. "fong"
* @return The result, e.g. "foo Fong bar"
*/
public static String[] removeStopwords(String query, String[] stopwords) {
Vector<String> ret = new Vector<String>();
StringTokenizer st = new StringTokenizer(query);
while (st.hasMoreTokens()) {
String word = st.nextToken();
if (!isStringInArray(word, stopwords)) {
ret.add(word);
}
}
return vectorToStringArray(ret);
}
/**
* Remove stopwords from a query ignoring case.
*
* @param query
* The query, e.g. "foo Fong bar fong"
* @param stopwords
* The stopwords, e.g. "fong"
* @return The result, e.g. "foo bar"
*/
public static String[] removeStopwordsIgnoreCase(String query,
String[] stopwords) {
Vector<String> ret = new Vector<String>();
StringTokenizer st = new StringTokenizer(query);
while (st.hasMoreTokens()) {
String word = st.nextToken();
if (!isStringInArrayIgnoreCase(word, stopwords)) {
ret.add(word);
}
}
return vectorToStringArray(ret);
}
/**
* Remove stopwords from a query.
*
* @param query
* The query, e.g. "foo fong bar fong"
* @param stopwords
* The stopwords, e.g. "fong"
* @return The result, e.g. "foo bar"
*/
public static String[] removeStopwordsIgnoreCase(String[] query,
String[] stopwords) {
Vector<String> ret = new Vector<String>();
for (String word : query) {
if (!isStringInArrayIgnoreCase(word, stopwords)) {
ret.add(word);
}
}
return vectorToStringArray(ret);
}
/**
* Insert a String sequence before a trigger String inside a target String.
*
* @param insertion
* The String to insert before the key.
* @param key
* The key or trigger String.
* @param target
* The target String containing the key.
* @return The result String.
*/
public static String insertBefore(String insertion, String key,
String target) {
int start = 0;
String temp = target;
while (start > -1) {
start = temp.indexOf(key, start);
if (start > -1) {
// int end = start + orig.length();
try {
temp = temp.substring(0, start) + insertion
+ temp.substring(start, temp.length());
} catch (Exception e) {
System.err.println("ERROR processing " + key + ", "
+ insertion + ", " + target);
e.printStackTrace();
}
start = start + insertion.length() + key.length();
}
}
return temp;
}
/**
* Insert a String sequence before a trigger String inside a target String
* ignoring case.
*
* @param insertion
* The String to insert before the key.
* @param key
* The key or trigger String.
* @param target
* The target String containing the key.
* @return The result String.
*/
public static String insertBeforeIgnoreCase(String insertion, String key,
String target) {
int start = 0;
String temp = target;
while (start > -1) {
start = temp.toLowerCase().indexOf(key.toLowerCase(), start);
if (start > -1) {
// int end = start + orig.length();
try {
temp = temp.substring(0, start) + insertion
+ temp.substring(start, temp.length());
} catch (Exception e) {
System.err.println("ERROR processing " + key + ", "
+ insertion + ", " + target);
e.printStackTrace();
}
start = start + insertion.length() + key.length();
}
}
return temp;
}
/**
* Insert a String sequence after a trigger String inside a target String.
*
* @param insertion
* The String to insert before the key.
* @param key
* The key or trigger String.
* @param target
* The target String containing the key.
* @return The result String.
*/
public static String insertAfter(String insertion, String key, String target) {
int start = 0;
String temp = target;
while (start > -1) {
start = temp.indexOf(key, start);
if (start > -1) {
int end = start + key.length();
try {
temp = temp.substring(0, end) + insertion
+ temp.substring(end, temp.length());
} catch (Exception e) {
System.err.println("ERROR processing " + key + ", "
+ insertion + ", " + target);
e.printStackTrace();
}
start = end;
}
}
return temp;
}
/**
* Insert a String sequence after a trigger String inside a target String
* ignoring case.
*
* @param insertion
* The String to insert before the key.
* @param key
* The key or trigger String.
* @param target
* The target String containing the key.
* @return The result String.
*/
public static String insertAfterIgnoreCase(String insertion, String key,
String target) {
int start = 0;
String temp = target;
while (start > -1) {
start = temp.toLowerCase().indexOf(key.toLowerCase(), start);
if (start > -1) {
int end = start + key.length();
try {
temp = temp.substring(0, end) + insertion
+ temp.substring(end, temp.length());
} catch (Exception e) {
System.err.println("ERROR processing " + key + ", "
+ insertion + ", " + target);
e.printStackTrace();
}
start = end;
}
}
return temp;
}
/**
* Count the occurrences of a String in another String.
*
* @param input
* The containing String.
* @param token
* The contained String.
* @return The number.
*/
public static int countTokens(String input, String token) {
int start = 0;
int ret = 0;
while (start > -1) {
start = input.indexOf(token, start);
if (start > -1) {
start = start + 1;
ret++;
}
}
return ret;
}
/**
* Return list of strings in a single string separated by a separator
* string.
*
* @param list
* The list.
* @param separator
* The separator.
* @return The new String.
*/
public static String stringList2String(List<String> list, String separator) {
String ret = "";
for (String s : list) {
ret += s + separator;
}
return ret.substring(0, ret.length() - separator.length());
}
/**
* Retrieve the parts of a String array as a single String separated by a
* separator.
*
* @param stringArray
* The string array, e.g. {"a", "a"}
* @param separator
* The separator, e.g. ", "
* @return The String, e.g. "a, a"
*/
public static String stringArray2String(String[] stringArray,
String separator) {
if (stringArray == null)
return null;
String ret = "";
for (String s : stringArray) {
ret += s + separator;
}
return ret.substring(0, ret.length() - separator.length());
}
/**
* Count the occurrences of a String in another String ignoring case.
*
* @param input
* The containing String.
* @param token
* The contained String.
* @return The number.
*/
public static int countTokensIgnoreCase(String input, String token) {
int start = 0;
int ret = 0;
while (start > -1) {
start = input.toLowerCase().indexOf(token.toLowerCase(), start);
if (start > -1) {
start = start + token.length();
ret++;
}
}
return ret;
}
/**
* Replace all ocurrences of one string by another.
*
* @param input
* The input, e.g. "bar foof bla foo bong."
* @param orig
* The String to be replaced, e.g. "foo"
* @param replace
* The replacement, e.g. "foobar"
* @return The result, e.g. "bar foobarf bla foobar bong."
*/
public static String replaceAll(String input, String orig, String replace) {
String temp = input;
final int origLen = orig.length();
final int replaceLen = replace.length();
try {
int start = 1, offset = 0;
while (start > -1) {
start = temp.indexOf(orig, offset);
if (start > -1) {
int end = start + origLen;
temp = temp.substring(0, start) + replace
+ temp.substring(end, temp.length());
offset = start + replaceLen;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return temp;
}
/**
* Main program used for testing.
*
* @param args
*/
public static void main(String[] args) {
System.out.println(replaceAll("1", "1", "1,1er"));
// StringUtil.printOutArray(StringUtil.stringToArray("id,1,2,3", ",",
// 1));
// System.out.println(switchCapitalization("Foo"));
// System.out.println(getWithoutSubstring("foo bar boo", "foo"));
// String[] test = { "das", "ist", "ein", "test", "satz" };
// Vector<String> testVector = new Vector<String>();
// testVector.add("das");
// testVector.add("ist");
// testVector.add("ein");
// testVector.add("test");
// testVector.add("satz");
// System.out.println(stringDistanceInVector("ist", "test",
// testVector));
// String[] stopwords = { "das" };
// System.out.println(arrayToString(removeStopwords(
// "hallo das ist ein test", stopwords)));
// System.out.println("is Bracket closed: "
// + isBracketClosed("[[ [[Daei [[das ist ]] ]]", "[[", "]]"));
// System.out
// .println("is Bracket closed: "
// + isBracketClosed(
// "(das () ist (test).)","(",")"));
// System.out.println(getFirstGroup("this is my foo.bar test.",
// ".*\\s(.{3}\\..{3})\\s.*", 1));
// System.out
// .println(extracPictureURL("http://test.com/index/blablahttp://test.com/index/bla.jpgquatschnochmal "));
// String[] testAb = { "foo", "bar", "foo", "gee" };
// boolean isKeyWordArrayinString = isKeywordArrayInStringIgnoreCase(
// "ist Foo hier?", testAb);
// System.out.println("isKeyWordArrayinString: " +
// isKeyWordArrayinString);
//
// for (String s : getAllButOne(testAb, 0)) {
// System.out.println(s);
// }
// System.out.println(stripLastTag("<bar>foo</bar>"));
// System.out.println(getAllButLast("das ist ein test."));
// String[] testA = { "foo", "bar", "foo", "foo", "bar", "foo" };
// String[] stops = { "foo" };
// for (String s : removeOuterStopwords(testA, stops)) {
// System.out.println("s: " + s);
// }
//
// System.out.println(capitalizeMultipleWords("karl-heinz rumennigge"));
// // String test =
// //
// "Manfred Albrecht Freiherr von Richthofen geboren am 2. Mai 1892 in Breslau; gestorben am 21. April 1918 bei Vaux-sur-Somme war ein deutscher Jagdflieger im Ersten Weltkrieg";
// String test =
// "Albert Einstein geboren am 14. März 1879 in Ulm, Königreich Württemberg Deutsches Reich; gestorben am 18. April 1955 in Princeton, USA war ein theoretischer Physiker.";
// String[] fronts = { "in", "bei" };
// String[] rears = { ";", "war", "gehörte" };
// System.out.println(getStringBetween(
// test.substring(test.indexOf("gestorben")), fronts, rears));
//
// test =
// "Hans Fallada geboren am 21. Juli 1893 in Greifswald; gestorben am 5. Februar 1947 in Berlin; eigentlich Rudolf";
// System.out.println(getStringBetween(test, "geboren am", "in"));
//
// String insertion = "</font>";
// String key = "ein";
// // String target = "hxhx";
// String target = "das ist ein test ein test";
// System.out.println("Ausgabe von insertBefore() für " + target + ", "
// + key + ", " + insertion);
// System.out.println(insertBefore(insertion, key, target));
// System.out.println("Ausgabe von insertAfter() für " + target + ", "
// + key + ", " + insertion);
// System.out.println(insertAfter(insertion, key, target));
//
// String countTokensTestInput = "lohallo thlo bralo da";
// String countTokensTestToken = "lo";
// System.out.println("Ausgabe von countTokens(\"" +
// countTokensTestInput
// + "\", \"" + countTokensTestToken + "\")");
// System.out.println(countTokens(countTokensTestInput,
// countTokensTestToken));
}
}