/******************************************************************************* * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com) * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt ******************************************************************************/ package com.opendoorlogistics.core.utils.strings; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.InputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.Charsets; import org.apache.commons.io.IOUtils; import com.opendoorlogistics.core.utils.Numbers; final public class Strings { private static final HashSet<Character> unicodeSpeechMarks; private static Pattern nonAlphaNumericOrWhitespace = Pattern.compile("[^a-zA-Z\\d\\s\\-]"); static { unicodeSpeechMarks = new HashSet<>(); for (char c : new char[] { '\u0022', '\u201C', '\u201D', '\u201E', '\u201F', '\u301D', '\u301E', '\u301F', '\uFF02', '\u2033', '\u2036' }) { unicodeSpeechMarks.add(c); } } private Strings() { } public interface DoesStringExist { boolean isExisting(String s); } /** * Check if the string s is already used and if so, add a number to the end of it (1, 2, 3...) to make it unique * * @param s * @return */ public static String makeUnique(String s, DoesStringExist cb) { long l = 0; while (l < Long.MAX_VALUE) { String ret = s + (l == 0 ? "" : Long.toString(l)); if (cb.isExisting(ret) == false) { return ret; } l++; } return null; } public static String standardiseSpeechMarks(String s) { int n = s.length(); StringBuilder builder = new StringBuilder(n); for (int i = 0; i < n; i++) { char c = s.charAt(i); if (unicodeSpeechMarks.contains(c)) { builder.append("\""); } else { builder.append(c); } } return builder.toString(); } public static String toCommas(String... strs) { return toString(",", strs); } public static String toCommas(Collection<String> strs) { return toCommas(strs.toArray(new String[strs.size()])); } public static String toFirstLetterInWordCapitalised(String s) { int n = s.length(); StringBuilder builder = new StringBuilder(); boolean lastIsSpace = true; for (int i = 0; i < n; i++) { char c = s.charAt(i); if (lastIsSpace) { builder.append(Character.toUpperCase(c)); } else { builder.append(Character.toLowerCase(c)); } lastIsSpace = Character.isWhitespace(c); } return builder.toString(); } public static interface ToString<T> { String toString(T o); } public static <T> String toCommas(ToString<T> toString, Collection<T> objs) { return toString(toString, ",", objs); } public static <T> String toString(ToString<T> toString, String separator, Collection<T> objs) { int count = 0; StringBuilder builder = new StringBuilder(); for (T obj : objs) { if (count > 0) { builder.append(separator); } builder.append(toString.toString(obj)); } return builder.toString(); } public static String toString(String separator, Collection<String> strs) { return toString(separator, strs.toArray(new String[strs.size()])); } /** * Does the collection contain the input string when both are standardised? * * @param find * @param findIn * @return */ public static boolean containsStandardised(String find, Iterable<String> findIn) { for (String test : findIn) { if (equalsStd(find, test)) { return true; } } return false; } public static String toString(String separator, String... strs) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < strs.length; i++) { builder.append(strs[i]); if (i != strs.length - 1) { builder.append(separator); } } return builder.toString(); } public static <T> String[] toStringArray(T[] objs) { String[] ret = new String[objs.length]; for (int i = 0; i < objs.length; i++) { ret[i] = objs[i].toString(); } return ret; } /** * See http://stackoverflow.com/questions/1155107/is-there-a-cross-platform-java-method-to-remove-filename-special-chars * * @param s * @return */ public static String toFileSafeString(String s) { return s.replaceAll("[^a-zA-Z0-9\\._]+", "_"); } public static String[] toLowerCaseArray(String[] arr) { String[] ret = new String[arr.length]; for (int i = 0; i < arr.length; i++) { ret[i] = arr[i].toLowerCase(); } return ret; } public static TreeSet<String> stdTreeSet(Iterable<String> iterable) { TreeSet<String> ret = new TreeSet<>(); if (iterable != null) { for (String s : iterable) { if (s != null) { String std = std(s); if (std.length() > 0) { ret.add(std(s)); } } } } return ret; } // /** // * Standardised version of a string value // * // * @param s // * @return // */ // public static String std(String s) { // if (s == null) { // return ""; // } // // // trim whitespace at start and end and convert to lowercase // s = s.trim().toLowerCase(); // // // ensure only have single spaces // s = s.replaceAll(" ", " "); // return s; // } /** * Standardised version of a string value. * Calculation is optimised as much as possible. * @param s * @return */ public static String std(String s){ if (s == null) { return ""; } int n = s.length(); StringBuilder b = new StringBuilder(n); // find first non-whitespace int firstNonWS=n; for(int i = 0 ; i < n ; i++){ char c = s.charAt(i); if(!Character.isWhitespace(c)){ firstNonWS = i; break; } } // get last non-whitespace char int lastNonWS=-1; for(int i = n-1; i>=0 ; i--){ char c = s.charAt(i); if(!Character.isWhitespace(c)){ lastNonWS = i; break; } } boolean inWhiteSpace=false; char c; for(int i =firstNonWS ; i <= lastNonWS ; i++){ c = Character.toLowerCase(s.charAt(i)); if(inWhiteSpace){ if(Character.isWhitespace(c)){ // never add two whitespaces in a row }else{ // no longer in whitespace inWhiteSpace = false; b.append(c); } }else{ if(Character.isWhitespace(c)){ // always treat whitespace as a space b.append(' '); inWhiteSpace = true; }else{ b.append(c); } } } return b.toString(); } /** * See http://stackoverflow.com/questions/5054995/how-to-replace-case-insensitive-literal-substrings-in-java * * @param source * @param target * @param replacement * @return */ public static String caseInsensitiveReplace(String source, String target, String replacement) { StringBuilder sbSource = new StringBuilder(source); StringBuilder sbSourceLower = new StringBuilder(source.toLowerCase()); String searchString = target.toLowerCase(); int idx = 0; while ((idx = sbSourceLower.indexOf(searchString, idx)) != -1) { sbSource.replace(idx, idx + searchString.length(), replacement); sbSourceLower.replace(idx, idx + searchString.length(), replacement); idx += replacement.length(); } sbSourceLower.setLength(0); sbSourceLower.trimToSize(); sbSourceLower = null; return sbSource.toString(); } public static String repeat(String s, int nTimes){ StringBuilder b = new StringBuilder(); for(int i =0 ; i < nTimes ; i++){ b.append(s); } return b.toString(); } /** * Find the index of the string in the array or return -1 if not found. An exact match is favoured over a standardised match. * * @param find * @param strs * @return */ public static int indexOfStd(String find, String[] strs) { for (int i = 0; i < strs.length; i++) { if (find.equals(strs[i])) { return i; } } for (int i = 0; i < strs.length; i++) { if (equalsStd(find, strs[i])) { return i; } } return -1; } /** * Case-sensitive equals which will return true if both a and b are null * * @param a * @param b * @return */ public static boolean equals(String a, String b) { if ((a == null) != (b == null)) { return false; } if (a == null) { // b must also be null return true; } return a.equals(b); } public static boolean equalsStd(String a, String b) { return equalsStd(a, b, null); } public static boolean equalsStd(String a, String b, StandardisedCache standardisedCache) { if (isEmpty(a) && isEmpty(b)) { return true; } if (a == null || b == null) { return false; } if(standardisedCache!=null){ return standardisedCache.std(a).equals(standardisedCache.std(b)); }else{ return std(a).equals(std(b)); } } private static class StdStringComparer { final private String notNumberGroup1 = "([^0-9]*)"; final private String notNumberGroup2 = "([^0-9]*.*)"; final private String number = "(\\d+)"; final private Pattern pattern = Pattern.compile(notNumberGroup1+ "\\s*" + number + "\\s*" + notNumberGroup2 + "(.*)"); final private static StdStringComparer singleton = new StdStringComparer(); private class StringComponents implements Comparable<StringComponents> { String original; String nonNumber; Long number; String secondNonNumber; public StringComponents(String s) { original = s; s = Strings.std(s); nonNumber = s; number = Long.MIN_VALUE; secondNonNumber = ""; Matcher matcher = pattern.matcher(s); if (matcher.matches()) { Long l = Numbers.toLong(matcher.group(2)); if (l != null) { nonNumber = Strings.std(matcher.group(1)); number = l; secondNonNumber = Strings.std(matcher.group(3)); } } } @Override public int compareTo(StringComponents o) { int diff = nonNumber.compareTo(o.nonNumber); if (diff == 0) { diff = Long.compare(number, o.number); } if (diff == 0) { diff = secondNonNumber.compareTo(o.secondNonNumber); } return diff; } @Override public String toString() { return original; } } private StdStringComparer() { } public int compare(String a, String b) { int diff = Boolean.compare(isEmpty(a), isEmpty(b)); if (diff == 0 && a == null) { // must both be empty... return 0; } StringComponents ca = new StringComponents(a); StringComponents cb = new StringComponents(b); diff = ca.compareTo(cb); return diff; } public static StdStringComparer singleton() { return singleton; } } public static String[] addArrays(String[] arr1, String ...arr2) { String [] ret = new String[arr1.length + arr2.length]; for(int i = 0 ; i < arr1.length ; i++){ ret[i] = arr1[i]; } for(int i = 0 ; i < arr2.length ;i++){ ret[i + arr1.length] = arr2[i]; } return ret; } public static String[] addToArray(String[] arr, String s) { String[] ret = new String[arr.length + 1]; System.arraycopy(arr, 0, ret, 0, arr.length); ret[arr.length] = s; return ret; } public static String[] addArrayToStr(String s,String[] arr) { String[] ret = new String[arr.length + 1]; ret[0]=s; System.arraycopy(arr, 0, ret, 1, arr.length); ret[arr.length] = s; return ret; } /** * Standardised comparison of two strings. The comparison compares the standardised version of the two strings. * * @param a * @param b * @param useNumberSortationLogic If true, the method handles the situation where you have a word followed by a number, e.g. "vehicle 9", "vehicle 11", * and applies numeric sorting to the number part. * @return */ public static int compareStd(String a, String b, boolean useNumberSortationLogic) { if(useNumberSortationLogic){ return StdStringComparer.singleton().compare(a, b); }else{ return std(a).compareTo(std(b)); } } public static boolean isTrue(String s) { if (s == null) { return false; } s = std(s); return s.equals("1") || s.equals("t") || s.equals("true") || s.equals("y") || s.equals("yes"); } public static boolean hasWhiteSpace(String s) { return hasWhitespace.matcher(s).matches(); } private final static Pattern hasWhitespace = Pattern.compile("\\S*\\s+.*"); private final static Pattern isNumber = Pattern.compile("\\s*-?\\d+\\.?\\d*\\s*"); private final static Pattern isIntNumber = Pattern.compile("\\s*-?\\d+\\s*$"); /** * Tests if the input string is an integer or decimal number. Leading and trailing whitespace is ignored. * * @param s * @return */ public static boolean isNumber(String s) { if (Strings.isEmpty(s)) { return false; } return isNumber.matcher(s).matches(); } private final static Pattern isEmail = Pattern.compile("^[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})$"); /** * Validates an email using the regular expression from * http://examples.javacodegeeks.com/core-java/util/regex/matcher/validate-email-address-with-java-regular-expression-example/ * * @param s * @return */ public static boolean isEmailAddress(String s) { return isEmail.matcher(s).matches(); } public static boolean isIntNumber(String s) { if (Strings.isEmpty(s)) { return false; } return isIntNumber.matcher(s).matches(); } public static boolean isEnclosedBySpeechMarks(String s) { return s.length() >= 2 && s.charAt(0) == '\"' && s.charAt(s.length() - 1) == '\"'; } /** * Parse integer without throwing exception * * @param s * @return */ public static Long parseLong(String s) { if (isIntNumber(s)) { try { return Long.parseLong(s); } catch (Throwable e) { // TODO: handle exception } } return null; } public static String getTabs(int nbTabs) { StringBuilder ret = new StringBuilder(); for (int i = 0; i < nbTabs; i++) { ret.append('\t'); } return ret.toString(); } /** * Adds additional tabs to the start of each line * * @param s * @param nbTabs * @return */ public static String getTabIndented(String s, int nbTabs) { String tabs = getTabs(nbTabs); StringBuilder ret = new StringBuilder(); int len = s.length(); if (len > 0) { ret.append(tabs); } for (int i = 0; i < len; i++) { char c = s.charAt(i); ret.append(c); if (c == '\n' && i < len - 1) { ret.append(tabs); } } return ret.toString(); } public static boolean isEmpty(Object o) { return o == null || isEmpty(o.toString()); } public static boolean isEmptyWhenStandardised(Object o) { return o == null || isEmptyWhenStandardised(o.toString()); } public static boolean isEmpty(String s) { return s == null || s.length() == 0; } public static boolean isEmptyWhenStandardised(String s) { return isEmpty(std(s)); } public static void writeToFile(String s, File file) { try { PrintWriter out = new PrintWriter(file); out.append(s); out.close(); } catch (Throwable e) { throw new RuntimeException(e); } } public static List<String> readFileAsLines(String path) { BufferedReader br = null; ArrayList<String> ret = new ArrayList<>(); try { br = new BufferedReader(new FileReader(path)); String line = br.readLine(); while (line != null) { ret.add(line); line = br.readLine(); } } catch (Throwable e) { throw new RuntimeException(e); } finally { if (br != null) { try { br.close(); } catch (Throwable e2) { throw new RuntimeException(e2); } } } return ret; } public static String readUTF8Resource(String name) { // Use own class loader to prevent problems when jar loaded by reflection InputStream is = Strings.class.getResourceAsStream(name); StringWriter writer = new StringWriter(); try { IOUtils.copy(is, writer, Charsets.UTF_8); is.close(); } catch (Throwable e) { throw new RuntimeException(e); } return writer.toString(); } public static String readFile(String path) { return readFile(path, Charset.defaultCharset()); } public static String readFile(String path, Charset encoding) { try { byte[] encoded = Files.readAllBytes(Paths.get(path)); return encoding.decode(ByteBuffer.wrap(encoded)).toString(); } catch (Throwable e) { throw new RuntimeException(e); } } /** * Converts text to a more-display friendly format. * * @param text * @return */ public static String convertEnumToDisplayFriendly(String text) { text = text.replaceAll("_", " "); text = text.replaceAll(" ", " "); text = text.trim(); text = text.toLowerCase(); if (text.length() > 0) { text = text.substring(0, 1).toUpperCase() + text.substring(1); } return text; } /** * Converts text to a more-display friendly format. * * @param text * @return */ public static String convertEnumToDisplayFriendly(Enum<?> en) { return convertEnumToDisplayFriendly(en.name()); } public static String getStackTrace(Throwable e) { StringWriter writer = new StringWriter(); PrintWriter printWriter = new PrintWriter(writer); e.printStackTrace(printWriter); return writer.toString(); } public static String getFiltered(String s, char... acceptChars) { TreeSet<Character> set = new TreeSet<>(); for (char c : acceptChars) { set.add(c); } StringBuilder ret = new StringBuilder(); for (int i = 0; i < s.length(); i++) { if (set.contains(s.charAt(i))) { ret.append(s.charAt(i)); } } return ret.toString(); } public static String toString(String separator, int... ints) { StringBuilder builder = new StringBuilder(); for (int i = 0; ints!=null && i < ints.length; i++) { if (i > 0) { builder.append(separator); } builder.append(ints[i]); } return builder.toString(); } public static String toCommas(int... ints) { if(ints!=null){ return toString(",", ints); } return ""; } /** * Remove chars from the string which could prove problematic when exporting. Currently this contains chars not allowed in workbook sheet names. * * @param s * @return */ public static String removeExportIllegalChars(String s) { char[] illegals = new char[] { '/', '\\', '?', '*', ']', '[', ':' }; StringBuilder builder = new StringBuilder(); int n = s.length(); for (int i = 0; i < n; i++) { char c = s.charAt(i); boolean found = false; for (char illegal : illegals) { if (illegal == c) { found = true; break; } } if (!found) { builder.append(c); } } return builder.toString(); } public static String getLeftWithoutWordSplitting(String s, int maxNbChars) { int n = s.length(); if (n <= maxNbChars) { return s; } // work out the max char to take int nbInclude = 0; boolean onNonBreak = true; for (int i = 0; i < n; i++) { char c = s.charAt(i); int nbChar = i + 1; if (onNonBreak && (c == ',' || c == ' ')) { if (nbChar <= maxNbChars || nbInclude == 0) { nbInclude = nbChar - 1; } if (nbChar > maxNbChars) { break; } onNonBreak = false; } else { onNonBreak = true; } } if (nbInclude == 0) { return ""; } return s.substring(0, nbInclude); } public static void main(String[] args) { ArrayList<String> list = new ArrayList<>(); for (String s : new String[] { " VEHILCE 9", "vehicle ", " vehicle 01 ", "vehicle 10", "vehicle 23b", "ve HIC le 23", " artic 1" }) { // list.add(s); System.out.println("\"" + s + "\"" + " -> " +"\""+ std(s) + "\""); } // Collections.sort(list, new Comparator<String>() { // // @Override // public int compare(String o1, String o2) { // return compareStd(o1, o2); // } // }); // System.out.println(list); } public static TreeSet<String> toTreeSet(String... strs) { TreeSet<String> ret = new TreeSet<>(); for (String s : strs) { ret.add(s); } return ret; } public static String getExceptionMessagesAsSingleStr(Throwable e) { StringBuilder b = new StringBuilder(); for(String s : getExceptionMessages(e)){ b.append(s); b.append(System.lineSeparator()); } return b.toString(); } /** * Gets the list of all messages from the exception and and the ancestor exception(s) that caused it. The list is returned in chronological order. * * @param e * @return */ public static List<String> getExceptionMessages(Throwable e) { // get reversed list of causes so its chronological ArrayList<Throwable> causes = new ArrayList<>(); while(e!=null){ causes.add(e); e = e.getCause(); } Collections.reverse(causes); // get the list of messages ArrayList<String> messages = new ArrayList<>(); ArrayList<String> ret = new ArrayList<>(); for(Throwable exception : causes){ if (isEmpty(exception.getMessage()) == false) { String msg = exception.getMessage(); // skip if part of the message has already been shown as its likely just the same message with // the exception class name added to the start boolean found = false; for(String shown: messages){ if(shown.length() > 3 && msg.toLowerCase().contains(shown.toLowerCase())){ found = true; break; } } if(!found){ // save to list of shown messages messages.add(msg); // only print the exception type if it gives the user some information if (exception.getClass() != Exception.class && exception.getClass() != RuntimeException.class) { ret.add("Exception of type \"" + exception.getClass().getSimpleName() + "\" : " + msg); } else { ret.add(msg); } } } } return ret; } public static interface LineCB { boolean lineCB(String line); } public static void parseFileAsLines(File file, LineCB cb) { BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); String line = br.readLine(); while (line != null) { if (!cb.lineCB(line)) { break; } line = br.readLine(); } } catch (Throwable e) { throw new RuntimeException(e); } finally { try { br.close(); } catch (Throwable e) { throw new RuntimeException(e); } } } /** * Returns true if the string is empty, or contains solely alphanumeric characters or whitespaces or - or : * * @param s * @return */ public static boolean isEmptyAlphaNumericWhitespaceOrDash(String s) { return nonAlphaNumericOrWhitespace.matcher(s).find() == false; } /** * Create a copy of the array where the first element is removed. * @param arr * @return */ public static String [] removeFirstElement(String [] arr){ int n = arr.length-1; String [] ret = new String[Math.max(0, n)]; for(int i =0 ; i< n ; i++){ ret[i] = arr[i+1]; } return ret; } }