/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.utilities;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.DataFormatException;
public class StringUtilities {
public static String commaSeparatedString(Collection<? extends Object> objects) {
StringBuffer result = new StringBuffer();
Iterator<? extends Object> iterator = objects.iterator();
if (iterator.hasNext()) {
result.append(iterator.next());
while (iterator.hasNext()) {
result.append(", ");
result.append(iterator.next());
}
}
return result.toString();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static String joinSorted(Collection<? extends Comparable> s, String delimiter) {
List list = new ArrayList(s);
Collections.sort(list);
return join(list, delimiter);
}
public static String join(Collection<?> s, String delimiter) {
StringBuffer buffer = new StringBuffer();
Iterator<?> iter = s.iterator();
if (iter.hasNext()) {
buffer.append(iter.next().toString());
}
while (iter.hasNext()) {
buffer.append(delimiter);
buffer.append(iter.next().toString());
}
return buffer.toString();
}
public static String join(Object[] objects, String delimiter) {
StringBuffer buffer = new StringBuffer();
if (objects.length != 0) buffer.append(objects[0].toString());
for (int i = 1; i < objects.length; i++){
buffer.append(delimiter);
buffer.append(objects[i].toString());
}
return buffer.toString();
}
public static int twoHexDigitsToInt(String value, int index) {
return Integer.parseInt(value.substring(index, index + 2), 16);
}
public static boolean isInteger(String string){
try{
Integer.parseInt(string);
}catch (NumberFormatException e) {
return false;
}
return true;
}
private static Pattern numberPattern = Pattern.compile("^-?\\d[0-9.,]*E?-?[0-9]*\\d$");
public static boolean isNumber(String string) {
string.trim();
if (string.length()==1) {
return Character.isDigit(string.charAt(0));
}
return numberPattern.matcher(string).matches();
}
public static boolean isRomanNumeral(String string) {
return (string.equals("I") ||
string.equals("II") ||
string.equals("III") ||
string.equals("IV") ||
string.equals("V") ||
string.equals("VI") ||
string.equals("VII") ||
string.equals("VIII") ||
string.equals("IX") ||
string.equals("IX"));
}
public static boolean isGreekLetter(String string) {
String lcstring = string.toLowerCase();
return (lcstring.equals("alpha") ||
lcstring.equals("beta") ||
lcstring.equals("gamma") ||
lcstring.equals("delta") ||
lcstring.equals("epsilon") ||
lcstring.equals("zeta") ||
lcstring.equals("eta") ||
lcstring.equals("theta") ||
lcstring.equals("iota") ||
lcstring.equals("kappa") ||
lcstring.equals("lambda") ||
lcstring.equals("mu") ||
lcstring.equals("nu") ||
lcstring.equals("xi") ||
lcstring.equals("omicron") ||
lcstring.equals("pi") ||
lcstring.equals("rho") ||
lcstring.equals("sigma") ||
lcstring.equals("tau") ||
lcstring.equals("upsilon") ||
lcstring.equals("phi") ||
lcstring.equals("chi") ||
lcstring.equals("psi") ||
lcstring.equals("omega"));
}
//Adds PSF file specific escape characters to string
//Author: Martijn
public static String escape(String string){
StringBuffer result = new StringBuffer();
for (int i = 0; i < string.length(); i++){
char currentChar = string.charAt(i);
if (currentChar == '"' || currentChar == '?' || currentChar == ';' || currentChar == '\\' || currentChar == '|') {
result.append('\\');
}
result.append(currentChar);
}
return result.toString();
}
//Removes any escape characters from string
//Author: Martijn
public static String unescape(String string){
StringBuffer result = new StringBuffer();
if (string.length() > 0){
if (string.charAt(0)=='"' && string.charAt(string.length()-1)=='"'){
result.append(string.substring(1,string.length()-1));
} else {
boolean escape = false;
char currentchar;
for (int i = 0; i < string.length(); i++){
currentchar = string.charAt(i);
if (escape){
escape = false;
result.append(currentchar);
}else{
if (currentchar == '\\') {
escape = true;
} else {
result.append(currentchar);
}
}
}
}
}
return result.toString();
}
//Safesplit works the same as default split, but takes escapes into account
//Author: Martijn
public static List<String> safeSplit(String string, char divider){
List<String> result = new ArrayList<String>();
if(string.length()==0){
result.add("");
return result;
}
boolean literal = false;
boolean escape = false;
int startpos = 0;
int i = 0;
char currentchar;
while (i < string.length()){
currentchar = string.charAt(i);
if (currentchar =='"'){literal = !literal;}
if (!literal && (currentchar == divider && !escape)){
result.add(string.substring(startpos,i));
startpos = i+1;
}
if (currentchar == '\\'){escape = !escape;} else {escape = false;}
i++;
}
//if (startpos != i){
result.add(string.substring(startpos,i));
//}
return result;
}
public static boolean containsNumber(String string) {
for (int i = 0; i < string.length(); i++){
if ((int)string.charAt(i)< 58 && (int)string.charAt(i)> 47){
return true;
}
}
return false;
}
public static int countNumbers(String string) {
int total = 0;
for (int i = 0; i < string.length(); i++){
if ((int)string.charAt(i)< 58 && (int)string.charAt(i)> 47){
total++;
}
}
return total;
}
public static boolean containsLetter(String string) {
for (int i = 0; i < string.length(); i++){
if (Character.isLetter(string.charAt(i))){
return true;
}
}
return false;
}
public static int countLetters(String string) {
int total = 0;
for (int i = 0; i < string.length(); i++){
if (Character.isLetter(string.charAt(i))){
total++;
}
}
return total;
}
public static boolean containsCurlyBracket(String string) {
for (int i = 0; i < string.length(); i++){
if (isCurlyBracket(string.charAt(i))){
return true;
}
}
return false;
}
public static boolean containsParenthesis(String string) {
for (int i = 0; i < string.length(); i++){
if (isParenthesis(string.charAt(i))){
return true;
}
}
return false;
}
public static boolean containsBracket(String string) {
for (int i = 0; i < string.length(); i++){
if (isBracket(string.charAt(i))){
return true;
}
}
return false;
}
public static boolean containsArrow(String string) {
for (int i = 0; i < string.length(); i++){
if (isArrow(string.charAt(i))){
return true;
}
}
return false;
}
public static boolean isParenthesis(char ch) {
return (ch == ('(') ||
ch == (')'));
}
//Checks whether the word is a brackets
//Author: Kristina
public static boolean isBracket(char ch) {
return (ch == ('[') ||
ch == (']'));
}
public static boolean isArrow(char ch) {
return (ch == ('<') ||
ch == ('>'));
}
//Checks whether the word is a curly bracket
//Author: Kristina
public static boolean isCurlyBracket(char ch) {
return (ch == ('{') ||
ch == ('}'));
}
//Converts a string to a list of words
//Author: Martijn
public static List<String> mapToWords(String string) {
List<String> result = new ArrayList<String>();
int start = 0;
int i = 0;
for (; i < string.length(); i++){
char ch = string.charAt(i);
if (!Character.isLetterOrDigit(ch) &&
!(ch == '\'' && i>0 && Character.isLetter(string.charAt(i-1)) && string.length()-1 > i && string.charAt(i+1) == 's' && (string.length()-2 == i || !Character.isLetterOrDigit(string.charAt(i+2))))){ //leaves ' in possesive pattern
if (start != i) {
result.add(string.substring(start,i));
}
start = i+1;
}
}
if (start != i) {
result.add(string.substring(start,i));
}
return result;
}
//Returns a string with the current time
//Author: Martijn
public static String now(){
Date d = new Date();
DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM);
return df.format(d);
}
public static void outputWithTime(String message){
System.out.println(now() + "\t" + message);
}
//Checks whether the word is an abbreviation
//Author: Martijn
public static boolean isAbbr(String word){
int lowercase = 0;
int uppercase = 0;
int charInt = 0;
for (int i = 0; i < word.length(); i++){
charInt = (int)word.charAt(i);
if (charInt<58){
if (charInt>47) {}//its a number
} else if (charInt<91) {
if (charInt>64) {uppercase++;}
} else if (charInt<123 && charInt>96) {lowercase++;}
}
return (uppercase>0 && lowercase < uppercase);
}
/**
* If only the first letter of a word is a capital, the word is reduced to lowercase, else the original string is returned
* @param string
* @return
*/
public static String firstLetterToLowerCase(String string){
boolean uppercase = false;
int charInt = 0;
for (int i = 1; i < string.length(); i++){
charInt = (int)string.charAt(i);
if (charInt<91)
if (charInt>64) {uppercase = true; break;}
}
if (!uppercase) return string.toLowerCase(); else return string;
}
public static int countsCharactersInUpperCase(String string){
int uppercase = 0;
int charInt = 0;
for (int i = 0; i < string.length(); i++){
charInt = (int)string.charAt(i);
if (charInt>64 && charInt<91){
uppercase++;
}
}
return uppercase;
}
public static int countsCharactersInLowerCase(String string){
int lowercase = 0;
int charInt = 0;
for (int i = 0; i < string.length(); i++){
charInt = (int)string.charAt(i);
if (charInt>96 && charInt<123){
lowercase++;
}
}
return lowercase;
}
//Converts a double to a formatted string. Examples of valid patterns are:
//"###,###.###"
//"###.##"
//"000000.000"
//"$###,###.###"
//"\u00a5###,###.###"
//# indicates optional number, 0 indicates forced number (will be printed as 0 when 0)
//Author: Martijn
public static String formatNumber(String pattern, double number){
DecimalFormat myFormatter = new DecimalFormat(pattern);
return myFormatter.format(number);
}
public static boolean isPlural(String string){
if (string.length() > 1)
if (string.charAt(string.length()-1) == 's')
if (Character.isLetter(string.charAt(string.length()-2)))
return true;
return false;
}
public static String findBetween(String source, String pre, String post){
int start = source.indexOf(pre);
if (start == -1) return "";
int end = source.indexOf(post, start+pre.length());
if (end == -1) return "";
return source.substring(start+pre.length(), end);
}
public static List<String> multiFindBetween(String source, String pre, String post){
List<String> result = new ArrayList<String>();
int start = 0;
int end = 0;
while (start != -1 && end != -1){
start = source.indexOf(pre, end);
if (start != -1){
end = source.indexOf(post, start+pre.length());
if (end != -1)
result.add(source.substring(start+pre.length(), end));
}
}
return result;
}
/**
* Returns true if every parenthesis in the string is matched
* @param string
* @return
*/
public static boolean parenthesisMatch(String string){
int count = 0;
for (int i = 0; i < string.length(); i++){
char ch = string.charAt(i);
if (ch == '(')
count++;
else if (ch == ')'){
count--;
if (count == -1)
return false;
}
}
return (count == 0);
}
public static int count(String s, char ch){
int cnt = 0;
for (int i = 0; i < s.length(); i++)
if (s.charAt(i) == ch)
cnt++;
return cnt;
}
/**
* Removes parenthesis and what is within the parenthesis from the string.
* For example: 'cold (disease)' -> 'cold '
* @param string
* @return
*/public static String removeParenthesisAndContent(String string){
StringBuilder result = new StringBuilder();
int count = 0;
for (int i = 0; i < string.length(); i++){
char ch = string.charAt(i);
if (ch == '(')
count++;
else if (ch == ')'){
count--;
} else if (count == 0)
result.append(ch);
}
return result.toString();
}
public static String daysToSortableDateString(long days) {
long ms = days * DateUtilities.day;
// Calendar calendar = new GregorianCalendar();
ms -= calendar.getTimeZone().getOffset(ms);
calendar.setTimeInMillis(ms);
StringBuilder sb = new StringBuilder();
sb.append(calendar.get(Calendar.YEAR));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.MONTH)+1));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.DATE)));
return sb.toString();
}
public static String daysToCalendarYear(long days) {
long ms = days * DateUtilities.day;
ms -= calendar.getTimeZone().getOffset(ms);
calendar.setTimeInMillis(ms);
return Integer.toString(calendar.get(Calendar.YEAR));
}
public static String daysToCalendarMonth(long days) {
long ms = days * DateUtilities.day;
ms -= calendar.getTimeZone().getOffset(ms);
calendar.setTimeInMillis(ms);
return Integer.toString(calendar.get(Calendar.MONTH)+1);
}
public static String daysToCalendarQuarterYear(long days) {
long ms = days * DateUtilities.day;
ms -= calendar.getTimeZone().getOffset(ms);
calendar.setTimeInMillis(ms);
return Integer.toString(1+(calendar.get(Calendar.MONTH)/3));
}
public static String millisecondsToSortableTimeString(long ms) {
//Calendar calendar = new GregorianCalendar();
ms -= calendar.getTimeZone().getOffset(ms+2*DateUtilities.hour);
calendar.setTimeInMillis(ms);
StringBuilder sb = new StringBuilder();
sb.append(calendar.get(Calendar.YEAR));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.MONTH)+1));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.DATE)));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.HOUR)));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.MINUTE)));
sb.append(StringUtilities.formatNumber("00", calendar.get(Calendar.SECOND)));
return sb.toString();
}
public static long sortableTimeStringToDays(String string) throws DataFormatException{
//Calendar calendar = new GregorianCalendar();
try{
int year = Integer.parseInt(string.substring(0,4));
int month = Integer.parseInt(string.substring(4,6))-1;
int day = Integer.parseInt(string.substring(6,8));
calendar.set(year, month, day);
long time = calendar.getTimeInMillis();
time += calendar.getTimeZone().getOffset(time);
if (string.length() > 8){
int hour = Integer.parseInt(string.substring(8,10));
time += hour * 60 * 60 * 1000;
if (string.length() > 8){
int minute = Integer.parseInt(string.substring(10,12));
time += minute * 60 * 1000;
if (string.length() > 8){
int second = Integer.parseInt(string.substring(12,14));
time += second * 1000;
}
}
}
// Millenium is added because for negative numbers, integer division truncates upwards! (-8/10 = 0)
return (((DateUtilities.millenium + time) / DateUtilities.day) - (1000*365));
} catch (Exception e){
throw new DataFormatException("Error parsing date: \"" + string + "\"");
}
}
public static long sortableTimeStringToMS(String string) throws DataFormatException{
//Calendar calendar = new GregorianCalendar();
try{
int year = Integer.parseInt(string.substring(0,4));
int month = Integer.parseInt(string.substring(4,6))-1;
int day = Integer.parseInt(string.substring(6,8));
calendar.set(year, month, day);
long time = calendar.getTimeInMillis();
time += calendar.getTimeZone().getOffset(time);
if (string.length() > 8){
int hour = Integer.parseInt(string.substring(8,10));
time += hour * 60 * 60 * 1000;
if (string.length() > 8){
int minute = Integer.parseInt(string.substring(10,12));
time += minute * 60 * 1000;
if (string.length() > 8){
int second = Integer.parseInt(string.substring(12,14));
time += second * 1000;
}
}
}
// Millenium is added because for negative numbers, integer division truncates upwards! (-8/10 = 0)
return (time);
} catch (Exception e){
throw new DataFormatException("Error parsing date: \"" + string + "\"");
}
}
private static Calendar calendar = new GregorianCalendar();
public static String replaceInternationalChars(String string){
char result[] = string.toCharArray();
for (int i = 0; i < result.length; i++){
char ch = result[i];
int charInt = (int)ch;
if (charInt == 216) result[i] = 'O';
else if (charInt == 248) result[i] = 'o';
else if (charInt == 246) result[i] = 'o';
else if (charInt == 244) result[i] = 'o';
else if (charInt == 245) result[i] = 'o';
else if (charInt == 242) result[i] = 'o';
else if (charInt == 243) result[i] = 'o';
else if (charInt == 237) result[i] = 'i';
else if (charInt == 238) result[i] = 'i';
else if (charInt == 239) result[i] = 'i';
else if (charInt == 232) result[i] = 'e';
else if (charInt == 233) result[i] = 'e';
else if (charInt == 234) result[i] = 'e';
else if (charInt == 235) result[i] = 'e';
else if (charInt == 231) result[i] = 'c';
else if (charInt == 224) result[i] = 'a';
else if (charInt == 225) result[i] = 'a';
else if (charInt == 226) result[i] = 'a';
else if (charInt == 227) result[i] = 'a';
else if (charInt == 228) result[i] = 'a';
else if (charInt == 229) result[i] = 'a';
else if (charInt == 252) result[i] = 'u';
else if (charInt == 250) result[i] = 'u';
else if (charInt == 253) result[i] = 'y';
else if (charInt == 241) result[i] = 'n';
}
return new String(result);
}
public static int caseInsensitiveIndexOf(String value, List<String> list){
String queryLC = value.toLowerCase();
for (int i = 0; i < list.size(); i++){
String string = list.get(i);
if (string.toLowerCase().equals(queryLC))
return i;
}
return -1;
}
public static int levenshteinDistance(String s, String t) {
int d[][]; // matrix
int n; // length of s
int m; // length of t
int i; // iterates through s
int j; // iterates through t
char s_i; // ith character of s
char t_j; // jth character of t
int cost; // cost
n = s.length();
m = t.length();
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
d = new int[n + 1][m + 1];
for (i = 0; i <= n; i++) {
d[i][0] = i;
}
for (j = 0; j <= m; j++) {
d[0][j] = j;
}
for (i = 1; i <= n; i++) {
s_i = s.charAt(i - 1);
for (j = 1; j <= m; j++) {
t_j = t.charAt(j - 1);
if (s_i == t_j) {
cost = 0;
}
else {
cost = 1;
}
d[i][j] = Math.min(d[i - 1][j] + 1, Math.min(d[i][j - 1] + 1, d[i - 1][j - 1] + cost));
}
}
return d[n][m];
}
/**
* Get hex string interpretation of the 16-byte MD5 hash for an input string
* Author: Kristina
* */
public static String getMD5Digest(String str) {
try {
byte[] buffer = str.getBytes();
byte[] result = null;
StringBuffer buf = null;
MessageDigest md5 = MessageDigest.getInstance("MD5");
//allocate room for the hash
result = new byte[md5.getDigestLength()];
//calculate hash
md5.reset();
md5.update(buffer);
result = md5.digest();
// System.out.println(result);
//create hex string from the 16-byte hash
buf = new StringBuffer(result.length * 2);
for (int i = 0; i < result.length; i++) {
int intVal = result[i] & 0xff;
if (intVal < 0x10) {
buf.append("0");
}
buf.append(Integer.toHexString(intVal).toUpperCase());
}
return buf.toString();
} catch (NoSuchAlgorithmException e) {
System.err.println("Exception caught: " + e);
e.printStackTrace();
}
return null;
}
/**
* Get hex string interpretation of the SHA-256 hash for an input string
* Author: Kristina
* */
public static String getSHA256Digest(String str) {
try {
byte[] buffer = str.getBytes();
byte[] result = null;
StringBuffer buf = null;
MessageDigest sha256 = MessageDigest.getInstance("SHA-256");
//allocate room for the hash
result = new byte[sha256.getDigestLength()];
//calculate hash
sha256.reset();
sha256.update(buffer);
result = sha256.digest();
// System.out.println(result);
//create hex string from the 16-byte hash
buf = new StringBuffer(result.length * 2);
for (int i = 0; i < result.length; i++) {
int intVal = result[i] & 0xff;
if (intVal < 0x10) {
buf.append("0");
}
buf.append(Integer.toHexString(intVal).toUpperCase());
}
return buf.toString();
} catch (NoSuchAlgorithmException e) {
System.err.println("Exception caught: " + e);
e.printStackTrace();
}
return null;
}
public static String wordWrap(String text, int lineLength) {
text=text.trim();
if (text.length() < lineLength)
return text;
if (text.substring(0, lineLength).contains("\n"))
return text.substring(0, text.indexOf("\n")).trim() + "\n\n" + wordWrap(text.substring(text.indexOf("\n") + 1), lineLength);
int place = Math.max(Math.max(text.lastIndexOf(" ",lineLength),text.lastIndexOf("\t",lineLength)),text.lastIndexOf("-",lineLength));
return text.substring(0,place).trim()+"\n"+wordWrap(text.substring(place),lineLength);
}
}