/*
* StringUtil.java
*
* Copyright (c) 2005-2009 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under GNU Public License.
*/
package wikokit.base.wikipedia.util;
import java.text.StringCharacterIterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
//import java.util.Comparator;
//import java.io.UnsupportedEncodingException;
public class StringUtil {
private StringUtil() {}
private final static String NULL_STRING = "";
private final static String[] NULL_STRING_ARRAY = new String[0];
private final static List<String> NULL_STRING_LIST = new ArrayList<String>(0);
private final static StringBuffer NULL_STRINGBUFFER = new StringBuffer("");
public static String join(String token, String[] strings )
{
if (null == strings || 0==strings.length)
return NULL_STRING;
StringBuilder sb = new StringBuilder();
for( int x = 0; x < ( strings.length - 1 ); x++ )
{
sb.append( strings[x] );
sb.append( token );
}
sb.append( strings[ strings.length - 1 ] );
return( sb.toString() );
}
public static String join( String token, int[] source)
{
if (null == source || 0==source.length)
return NULL_STRING;
String result = "";
result += source[0];
for(int i=1; i<source.length; i++) {
result += token + source[i];
}
return result;
}
public static String[] split( String token, String s )
{
if(null == s || 0 == s.length())
return NULL_STRING_ARRAY;
List<String> ls = new ArrayList<String>();
int previousLoc = 0;
int loc = s.indexOf( token, previousLoc );
if(-1 == loc) {
ls.add(s);
}
else
{
do
{
ls.add( s.substring( previousLoc, loc ) );
previousLoc = ( loc + token.length() );
loc = s.indexOf( token, previousLoc );
}
while( ( loc != -1 ) && ( previousLoc < s.length() ) );
ls.add( s.substring( previousLoc ) );
}
return( (String[])ls.toArray(NULL_STRING_ARRAY) );
}
/** Doubles slashes before quotes. */
public static String escapeChars(String text){
if (null == text) {
System.out.println("Error in StringUtil.escapeChars(), argument is null.");
return NULL_STRING;
}
if (text.equalsIgnoreCase("\\"))
return "\\\\";
StringBuilder result = new StringBuilder();
StringCharacterIterator iterator = new StringCharacterIterator(text);
char character = iterator.current();
while (character != StringCharacterIterator.DONE ){
if (character == '\"') {
result.append("\\\"");
}
else if (character == '\'') {
result.append('\\');
result.append('\'');
//result.append("\\'");
}
else if (character == '\\') {
result.append('\\');
result.append('\\');
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/** Doubles slashes before dollar sign "$" and backslash "\", skip two slashes "\\".
*
* slash1 (true if prev prev is "\")
* slash2 (previous)
* slash3 (current character) <br>
* $ -> \\$ if !slash1 && !slash2 then + "\\" <br>
* \\ -> \\ if slash1 && slash2 then skip <br>
* \\$ -> \\$
*/
public static StringBuffer escapeCharDollarAndBackslash(String text){
if (null == text) {
System.out.println("Error in StringUtil.escapeCharDollar(), argument is null.");
return NULL_STRINGBUFFER; // NULL_STRING;
}
StringBuffer result = new StringBuffer();
StringCharacterIterator iterator = new StringCharacterIterator(text);
char character = iterator.current();
boolean slash1 = false, slash2 = false, slash3;
while (character != StringCharacterIterator.DONE ){
slash3 = character == '\\';
boolean appended = false;
if ( (!slash1 && !slash2)
|| (slash1 && slash2)) {
if ('$' == character) {
result.append("\\$");
appended = true;
slash1 = slash2 = false;
} else {
if (slash3) {
appended = true; // It will be appended in the next cycle
slash1 = slash2 = false;
}
}
} else {
if (!slash1 && slash2) {
if ('$' == character) {
result.append("\\$");
} else {
result.append("\\\\");
if(slash3) {
result.append("\\\\");
} else {
result.append(character);
}
}
slash1 = slash2 = slash3 = false;
appended = true;
}
}
if (!appended) {
result.append(character);
}
slash1 = slash2;
slash2 = slash3;
character = iterator.next();
}
if (slash2) {
result.append("\\\\");
}
return result;
}
/** Substitutes spaces by underscore character */
public static String spaceToUnderscore(String text) {
return text.replace(' ', '_');
}
/** Substitutes all underscores by space character */
public static String underscoreToSpace(String text) {
return text.replace('_', ' ');
}
/** Gets list of unique strings (case insensitive). */
public static List<String> getUnique(List<String> l) {
List<String> result = new ArrayList<String>();
if(null == l)
return result;
for(int i=0; i<l.size(); i++) {
boolean bunique = true;
String s = l.get(i);
for(int j=0; j<result.size(); j++) {
if(result.get(j).equalsIgnoreCase(s)) {
bunique = false;
break;
}
}
if(bunique)
result.add(s);
}
return result;
}
/** Adds two lists to one, i.e. creates the list of unique strings (case insensitive). */
public static List<String> addOR(List<String> a,List<String> b) {
if(a==null && b==null) {
return NULL_STRING_LIST;
}
List<String> result = new ArrayList<String>();
if(a==null || 0==a.size()) {
result.addAll(b);
} else if(b==null || 0==b.size()) {
result.addAll(a);
} else {
result.addAll(getUnique(a));
for(int i=0; i<b.size(); i++) {
boolean bunique = true;
String word = b.get(i);
for(int j=0; j<result.size(); j++) {
if(result.get(j).equalsIgnoreCase(word)) {
bunique = false;
break;
}
}
if(bunique)
result.add(word);
}
}
return result;
}
/** Adds two arrays to one, i.e. creates the array of unique strings (case sensitive). */
public static String[] addORCaseSensitive(String[] a,String[] b) {
if(a==null && b==null) {
return NULL_STRING_ARRAY;
}
List<String> result = new ArrayList<String>();
if(a==null || 0==a.length) {
result.addAll(Arrays.asList(b));
} else if(b==null || 0==b.length) {
result.addAll(Arrays.asList(a));
} else {
result.addAll(getUnique(Arrays.asList(a)));
for(int i=0; i<b.length; i++) {
if(!result.contains(b[i]))
result.add(b[i]);
}
}
return ((String[])result.toArray(NULL_STRING_ARRAY));
}
/** Returns true if array 'ar' contains string 'wanted', ignore case. */
public static boolean containsIgnoreCase(String[] ar, String wanted) {
if(null == ar) {
return false;
}
for(String s:ar) {
if(wanted.equalsIgnoreCase(s)) {
return true;
}
}
return false;
}
/** Returns intersections of two list.
* Return null if input String[] is null.
*/
public static String[] intersect(String[] list1, String[] list2) {
if( null == list1 || 0 == list1.length ||
null == list2 || 0 == list2.length) {
return null;
}
Map<String, Boolean> map1 = new HashMap<String, Boolean>();
for(String s:list1) {
map1.put(s, false);
}
int counter = 0;
for(String s:list2) {
if(map1.containsKey(s)) {
//map1.put(s, true);
counter ++;
}
}
String[] res = new String[counter];
counter = 0;
for(String s:list2) {
if(map1.containsKey(s)) {
res[counter++] = s;
}
}
return res;
}
/** Gets substring before the first occurence of the character with
* the character ch.
*/
private static String getTextBeforeFirstChar(String s, int ch) {
if(null == s)
return NULL_STRING;
int i = s.indexOf(ch);
if(-1 == i)
return s;
return s.substring(0, i);
}
public static String getTextBeforeFirstColumn(String s) {
return getTextBeforeFirstChar(s, ':'); // ':' = 58
}
public static String getTextBeforeFirstVerticalPipe(String s) {
return getTextBeforeFirstChar(s, '|'); // '|' = 124
}
/** Gets substring after the first occurence of the character with
* the character ch.
*/
private static String getTextAfterFirstChar(String s, int ch) {
if(null == s)
return NULL_STRING;
int i = s.indexOf(ch);
if(-1 == i)
return NULL_STRING;
return s.substring(i+1);
}
public static String getTextAfterFirstColumn(String s) {
return getTextAfterFirstChar(s, 58); // ":" = 58
}
public static String getTextAfterFirstVerticalPipe(String s) {
return getTextAfterFirstChar(s, 124); // "|" = 124
}
public static String getTextAfterFirstSpace(String s) {
return getTextAfterFirstChar(s, 32);
}
public static String getTextBeforeFirstAndSecondColumns(String s) {
if(null == s)
return NULL_STRING;
int i1 = s.indexOf(58); // ":" = 58
if(-1 == i1)
return s;
int i2 = s.indexOf(58, i1+1);
if(-1 == i2)
return s.substring(i1 + 1);
return s.substring(i1 + 1, i2);
}
/** Gets text from position 'pos' till the space or punctuation mark. */
public static String getTextTillSpaceOrPuctuationMark(int pos, String s) {
if(null == s || pos >= s.length() || pos < 0)
return NULL_STRING;
String punctuation_mark = "()[]{}〈〉:,‒–—―…!.-‐‽?‘’“”/·";
// source: http://en.wiktionary.org/wiki/punctuation_mark
//apostrophe ( ' ) ( ’ )
//brackets ()[]{}〈〉
//colon ( : )
//comma ( , )
//dashes ‒–—―
//ellipsis …
//exclamation mark ( ! )
//full stop/period ( . )
//hyphen -‐
//interrobang ( ‽ )
//question mark ( ? )
//quotation marks ‘’“”
//semicolon ( ; )
//slash/solidus ( / )
// space ( ) and interpunct ( · )
int len = s.length();
int i = pos;
boolean space_or_punctuation = false;
while (i < len && !space_or_punctuation) {
char ch = s.charAt(i);
space_or_punctuation = Character.isWhitespace(ch) || -1 != punctuation_mark.indexOf(ch);
if (!space_or_punctuation) {
i ++;
}
}
return s.substring(pos, i);
}
/** Returns true if third character is column, e.g. "ru:test" */
public static boolean isInterWiki(String title) {
if(null == title || 3 > title.length())
return false;
return 58 == title.charAt(2); // ":" is 58
}
/** Converts two letters word: first letter to Upper, second letter
* to Lower case. */
public static String UpperFirstLowerSecondLetter(String s) {
if(null == s || 2 != s.length())
return s;
return s.substring(0,1).toUpperCase() + s.substring(1,2).toLowerCase();
}
/** Converts first letter to upper-case (capitalization - good for WP, bad for Wiktionary). */
public static String UpperFirstLetter(String s) {
if(null == s || s.length() < 1)
return s;
return s.substring(0,1).toUpperCase() + s.substring(1);
}
/*public static final Comparator<String> LEXICOGRAPHICAL_ORDER = new Comparator<String>() {
public int compare(String s1, String s2) {
int i = s1.compareToIgnoreCase(s2);
System.out.println("i="+i+"; s1="+s1+"; s2="+s2);
return i;
//return s1.compareToIgnoreCase(s2);
}
};*/
/** Trims each element of array. */
public static String[] trim(String[] s) {
for(int i=0; i<s.length; i++)
s[i] = s[i].trim();
return s;
}
/** Replace special characters (e.g. , by spaces).
*/
public static String replaceSpecialChars(String str)
{
if(str.contains(" "))
str = str.replace(" ", " ");
if(str.contains(" "))
str = str.replace(" ", " ");
return str;
}
}