/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.util.string; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.Arrays; import java.util.List; import java.util.ListIterator; import java.util.StringTokenizer; import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; public class StringUtils { // Removes blanks in the beginning and at the end of a string public static String deblank(String str) { StringTokenizer s = new StringTokenizer(str, " ", false); StringBuilder strRet = new StringBuilder(); while (s.hasMoreElements()) strRet.append(s.nextElement()); return strRet.toString(); } /** * Join labels into string * * @param glue * String inserted between the elements as they are joined * @param items * Strings to be joined together * * @return joined String, or the empty string if items has length 0 * @throws NullPointerException * if either glue or items or any of the items is null */ public static String join(String glue, String[] items) { if (glue == null || items == null) { throw new NullPointerException("Null args"); } if (items.length == 0) { return ""; } StringBuilder sb = new StringBuilder(); sb.append(items[0]); for (int i = 1; i < items.length; i++) { sb.append(glue).append(items[i]); } return sb.toString(); } // Converts a String to a float public static float string2float(String str) { return Float.valueOf(str).floatValue(); } // Converts a String to a double public static double string2double(String str) { return Double.valueOf(str).doubleValue(); } // Converts a String to an int public static int string2int(String str) { return Integer.valueOf(str).intValue(); } public static float[] string2float(String[] strs) { float[] values = null; if (strs != null && strs.length > 0) { values = new float[strs.length]; for (int i = 0; i < strs.length; i++) values[i] = string2float(strs[i]); } return values; } public static double[] string2double(String[] strs) { double[] values = null; if (strs != null && strs.length > 0) { values = new double[strs.length]; for (int i = 0; i < strs.length; i++) values[i] = string2double(strs[i]); } return values; } public static int[] string2int(String[] strs) { int[] values = null; if (strs != null && strs.length > 0) { values = new int[strs.length]; for (int i = 0; i < strs.length; i++) values[i] = string2int(strs[i]); } return values; } // Find indices of multiple occurrences of a character in a String public static int[] find(String str, char ch, int stInd, int enInd) { int[] indices = null; int i; int count = 0; if (stInd < 0) stInd = 0; if (stInd > str.length() - 1) stInd = str.length() - 1; if (enInd < stInd) enInd = stInd; if (enInd > str.length() - 1) enInd = str.length() - 1; for (i = stInd; i <= enInd; i++) { if (str.charAt(i) == ch) count++; } if (count > 0) indices = new int[count]; int total = 0; for (i = stInd; i <= enInd; i++) { if (str.charAt(i) == ch && total < count) indices[total++] = i; } return indices; } public static int[] find(String str, char ch, int stInd) { return find(str, ch, stInd, str.length() - 1); } public static int[] find(String str, char ch) { if (str.length() == 0) return null; return find(str, ch, 0, str.length() - 1); } // Check last folder separator character and append it if it does not exist public static String checkLastSlash(String strIn) { String strOut = strIn; char last = strIn.charAt(strIn.length() - 1); if (last != File.separatorChar && last != '\\' && last != '/') strOut = strOut + "/"; return strOut; } public static String removeLastSlash(String strIn) { String strOut = strIn; while (true) { char last = strOut.charAt(strOut.length() - 1); if (last == File.separatorChar || last == '\\' || last == '/') strOut = strOut.substring(0, strOut.length() - 1); else break; } return strOut; } /** * Purge non-breaking spaces from <b>input</b> by replacing them with regular spaces. * * @param input * to purge * @return purged <b>input</b> */ public static String purgeNonBreakingSpaces(String input) { String output = input.replaceAll("\\xA0", " "); return output; } // Check first file extension separator character and add it if it does not exist public static String checkFirstDot(String strIn) { String strOut = strIn; char extensionSeparator = '.'; char first = strIn.charAt(0); if (first != extensionSeparator) strOut = extensionSeparator + strOut; return strOut; } // Default start index is 1 public static String[] indexedNameGenerator(String preName, int numFiles) { return indexedNameGenerator(preName, numFiles, 1); } public static String[] indexedNameGenerator(String preName, int numFiles, int startIndex) { return indexedNameGenerator(preName, numFiles, startIndex, ""); } public static String[] indexedNameGenerator(String preName, int numFiles, int startIndex, String postName) { return indexedNameGenerator(preName, numFiles, startIndex, postName, ".tmp"); } public static String[] indexedNameGenerator(String preName, int numFiles, int startIndex, String postName, String extension) { int numDigits = 0; if (numFiles > 0) numDigits = (int) Math.floor(Math.log10(startIndex + numFiles - 1)); return indexedNameGenerator(preName, numFiles, startIndex, postName, extension, numDigits); } // Generate a list of files in the format: // <preName>startIndex<postName>.extension // <preName>startIndex+1<postName>.extension // <preName>startIndex+2<postName>.extension // ... // The number of required characters for the largest index is computed automatically if numDigits<required number of // characters for the largest index // The minimum value of startIndex is 0 (negative values are converted to zero) public static String[] indexedNameGenerator(String preName, int numFiles, int startIndex, String postName, String extension, int numDigits) { String[] fileList = null; if (numFiles > 0) { if (startIndex < 0) startIndex = 0; int tmpDigits = (int) Math.floor(Math.log10(startIndex + numFiles - 1)); if (tmpDigits > numDigits) numDigits = tmpDigits; fileList = new String[numFiles]; String strNum; for (int i = startIndex; i < startIndex + numFiles; i++) { strNum = String.valueOf(i); // Add sufficient 0´s in the beginning while (strNum.length() < numDigits) strNum = "0" + strNum; // fileList[i - startIndex] = preName + strNum + postName + extension; } } return fileList; } public static String modifyExtension(String strFilename, String desiredExtension) { String strNewname = strFilename; String desiredExtension2 = checkFirstDot(desiredExtension); int lastDotIndex = strNewname.lastIndexOf('.'); strNewname = strNewname.substring(0, lastDotIndex) + desiredExtension2; return strNewname; } /** * * @param strFilename * strFilename * @param isIncludeDot * isIncludeDot * @return strExtension * @deprecated use {@link org.apache.commons.io.FilenameUtils#getExtension(String)} instead */ @Deprecated public static String getFileExtension(String strFilename, boolean isIncludeDot) { int lastDotIndex = strFilename.lastIndexOf('.'); String strExtension = ""; if (lastDotIndex > -1) { if (isIncludeDot) strExtension = strFilename.substring(lastDotIndex, strFilename.length()); else strExtension = strFilename.substring(lastDotIndex + 1, strFilename.length()); } return strExtension; } public static int findInMap(int[][] map, int ind0) { for (int i = 0; i < map.length; i++) { if (map[i][0] == ind0) return map[i][1]; } return -1; } public static int findInMapReverse(int[][] map, int ind1) { for (int i = 0; i < map.length; i++) { if (map[i][1] == ind1) return map[i][0]; } return -1; } public static boolean isNumeric(String str) { for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); if (!Character.isDigit(ch) && ch != '.') return false; } return true; } // Retrieves filename from fullpathname // Also works for removing file extension from a filename with extension public static String getFileName(String fullpathFilename, boolean bRemoveExtension) { String filename = ""; int ind1 = fullpathFilename.lastIndexOf('\\'); int ind2 = fullpathFilename.lastIndexOf('/'); ind1 = Math.max(ind1, ind2); if (ind1 < 0) ind1 = -1; if (ind1 < fullpathFilename.length() - 2) filename = fullpathFilename.substring(ind1 + 1); if (bRemoveExtension) { ind1 = filename.lastIndexOf('.'); if (ind1 >= 1) filename = filename.substring(0, ind1); } return filename; } public static String getFileName(String fullpathFilename) { return getFileName(fullpathFilename, true); } /** * * @param fullpathFilename * fullpathFilename * @return foldername * @deprecated use {@link org.apache.commons.io.FilenameUtils#getFullPath(String)} instead */ @Deprecated public static String getFolderName(String fullpathFilename) { String foldername = ""; int ind1 = fullpathFilename.lastIndexOf('\\'); int ind2 = fullpathFilename.lastIndexOf('/'); ind1 = Math.max(ind1, ind2); if (ind1 >= 0 && ind1 < fullpathFilename.length() - 2) foldername = fullpathFilename.substring(0, ind1 + 1); return foldername; } // Reads all rows as one String public static String[] readTextFile(String textFile) throws IOException { return readTextFile(textFile, "ASCII"); } public static String[] readTextFile(String textFile, String encoding) throws IOException { String[][] tmp = readTextFileInRows(textFile, encoding, 1); String[] strRet = new String[tmp.length]; for (int i = 0; i < tmp.length; i++) strRet[i] = tmp[i][0]; return strRet; } public static String[][] readTextFileInRows(String textFile, String encoding, int minimumItemsInOneLine) throws IOException { String[][] entries = null; String allText = FileUtils.readFileToString(new File(textFile), encoding); if (allText != null) { String[] lines = allText.split("\n"); entries = parseFromLines(lines, minimumItemsInOneLine, 0, lines.length - 1); } return entries; } public static String[][] parseFromLines(String[] lines, int minimumItemsInOneLine, int startLine, int endLine) { String[][] labels = null; String[][] labelsRet = null; if (startLine <= endLine) { int i, j; int count = 0; for (i = startLine; i <= endLine; i++) { String[] labelInfos = null; if (minimumItemsInOneLine > 1) { labelInfos = lines[i].split(" "); } else { labelInfos = new String[1]; labelInfos[0] = lines[i]; } boolean isNotEmpty = false; for (j = 0; j < labelInfos.length; j++) { labelInfos[j] = labelInfos[j].trim(); if (labelInfos[j].length() != 0) isNotEmpty = true; } if (labelInfos.length > 0 && isNotEmpty) count++; } int tmpCount = 0; if (count > 0) { labels = new String[count][]; for (i = startLine; i <= endLine; i++) { if (tmpCount > count - 1) break; String[] labelInfos = null; if (minimumItemsInOneLine > 1) { labelInfos = lines[i].split(" "); } else { labelInfos = new String[1]; labelInfos[0] = lines[i]; } boolean isNotEmpty = false; for (j = 0; j < labelInfos.length; j++) { labelInfos[j] = labelInfos[j].trim(); if (labelInfos[j].length() != 0) isNotEmpty = true; } if (labelInfos.length > 0 && isNotEmpty) { labels[tmpCount] = new String[minimumItemsInOneLine]; for (j = 0; j < Math.min(labelInfos.length, minimumItemsInOneLine); j++) labels[tmpCount][j] = labelInfos[j].trim(); tmpCount++; } } labelsRet = new String[tmpCount][]; for (i = 0; i < tmpCount; i++) { labelsRet[i] = new String[minimumItemsInOneLine]; for (j = 0; j < minimumItemsInOneLine; j++) labelsRet[i][j] = labels[i][j]; } } } return labelsRet; } public static int[] getDifferentItemsList(int[] items) { int[] differentItems = null; int[] indices = getDifferentItemsIndices(items); if (indices != null) { differentItems = new int[indices.length]; for (int i = 0; i < indices.length; i++) differentItems[i] = items[indices[i]]; } return differentItems; } public static int[] getDifferentItemsIndices(int[] items) { String[] strItems = new String[items.length]; for (int i = 0; i < items.length; i++) strItems[i] = String.valueOf(items[i]); return getDifferentItemsIndices(strItems); } public static String[] getDifferentItemsList(String[] items) { String[] differentItems = null; int[] indices = getDifferentItemsIndices(items); if (indices != null) { differentItems = new String[indices.length]; for (int i = 0; i < indices.length; i++) differentItems[i] = items[indices[i]]; } return differentItems; } public static int[] getDifferentItemsIndices(String[] items) { int[] differentItemIndices = null; if (items != null) { int[] tmpDifferentItemIndices = new int[items.length]; int differentCount = 1; int i, j; tmpDifferentItemIndices[0] = 0; boolean bDifferent; for (i = 1; i < items.length; i++) { bDifferent = true; for (j = 0; j < differentCount; j++) { if (items[i].compareTo(items[tmpDifferentItemIndices[j]]) == 0) { bDifferent = false; break; } } if (bDifferent) { tmpDifferentItemIndices[differentCount] = i; differentCount++; if (differentCount >= items.length) break; } } differentItemIndices = new int[differentCount]; System.arraycopy(tmpDifferentItemIndices, 0, differentItemIndices, 0, differentCount); } return differentItemIndices; } public static boolean isDesired(int currentFeature, int desiredFeatures) { return isDesired(currentFeature, desiredFeatures, 0); } public static boolean isDesired(int currentFeature, int desiredFeatures, int maxFeatureStringLen) { boolean bRet; String str1 = Integer.toBinaryString(desiredFeatures); String str2 = Integer.toBinaryString(currentFeature); if (maxFeatureStringLen < str1.length()) maxFeatureStringLen = str1.length(); if (maxFeatureStringLen < str2.length()) maxFeatureStringLen = str2.length(); while (str1.length() < maxFeatureStringLen) str1 = "0" + str1; while (str2.length() < maxFeatureStringLen) str2 = "0" + str2; bRet = false; for (int i = 0; i < str1.length(); i++) { if (Integer.valueOf(String.valueOf(str1.charAt(i))) == 1 && Integer.valueOf(String.valueOf(str2.charAt(i))) == 1) { bRet = true; break; } } return bRet; } public static String getRandomName(int randomNameLength) { return getRandomName(null, randomNameLength); } public static String getRandomName(String preName, int randomNameLength) { return getRandomName(preName, randomNameLength, null); } public static String getRandomName(String preName, int randomNameLength, String postName) { String randomName = ""; while (randomName.length() < randomNameLength) randomName += String.valueOf((int) (10 * Math.random())); if (preName != null) randomName = preName + randomName; if (postName != null) randomName += postName; return randomName; } public static String getRandomFileName(String preName, int randomNameLength, String fileExtension) { if (fileExtension.charAt(0) != '.') fileExtension = "." + fileExtension; return getRandomName(preName, randomNameLength, fileExtension); } public static boolean isOneOf(String item, String[] list) { boolean isFound = false; for (int i = 0; i < list.length; i++) { if (item.compareTo(list[i]) == 0) { isFound = true; break; } } return isFound; } /** * @param allInOneLine * allInOneLine * @return result.toArray(new String[0]) * @deprecated Unstable due to platform-specific behavior. Use {@link org.apache.commons.lang.StringUtils#split} or similar * instead. */ @Deprecated public static String[] toStringArray(String allInOneLine) { if (allInOneLine != "") { Vector<String> result = new Vector<String>(); StringTokenizer s = new StringTokenizer(allInOneLine, System.getProperty("line.separator")); String line = null; // Read until either end of file or an empty line while (s.hasMoreTokens() && ((line = s.nextToken()) != null) && (!line.equals(""))) result.add(line); return result.toArray(new String[0]); } else return null; } public static InputStream toInputStream(String str) { ByteArrayInputStream stream = null; try { stream = new ByteArrayInputStream(str.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } return stream; } public static InputStream toInputStream(String[] stringArray) { return toInputStream(stringArray, 0); } public static InputStream toInputStream(String[] stringArray, int startIndex) { return toInputStream(stringArray, startIndex, stringArray.length); } public static InputStream toInputStream(String[] stringArray, int startIndex, int endIndex) { String str = toString(stringArray, startIndex, endIndex); return toInputStream(str); } /** * Combine the elements of the given string array into a single string, containing one array element per line. * * @param stringArray * stringArray * @return toString(stringArray, 0) */ public static String toString(String[] stringArray) { return toString(stringArray, 0); } /** * Combine the elements of the given string array into a single string, containing one array element per line. * * @param stringArray * stringArray * @param startIndex * startIndex * @return toString(stringArray, startIndex, stringArray.length - 1) */ public static String toString(String[] stringArray, int startIndex) { return toString(stringArray, startIndex, stringArray.length - 1); } /** * Combine the elements of the given string array into a single string, containing one array element per line. * * @param stringArray * stringArray * @param startIndex * startIndex * @param endIndex * endIndex * @return str converted to string */ public static String toString(String[] stringArray, int startIndex, int endIndex) { if (startIndex < 0) startIndex = 0; if (startIndex > stringArray.length - 1) startIndex = stringArray.length - 1; if (endIndex < startIndex) endIndex = startIndex; if (endIndex > stringArray.length - 1) endIndex = stringArray.length - 1; StringBuilder str = new StringBuilder(); for (int i = startIndex; i <= endIndex; i++) { str.append(stringArray[i]).append(System.getProperty("line.separator")); } return str.toString(); } public static String replace(String str, String pattern, String replacement) { int s = 0; int e = 0; StringBuilder result = new StringBuilder(); while ((e = str.indexOf(pattern, s)) >= 0) { result.append(str.substring(s, e)); result.append(replacement); s = e + pattern.length(); } result.append(str.substring(s)); return result.toString(); } public static String urlEncode(String strRequest) { String encoded = strRequest; try { encoded = URLEncoder.encode(encoded, "UTF-8"); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } return encoded; } public static String urlDecode(String strRequest) { // decoded = StringUtils.replace(strRequest, "%20", " "); String decoded = strRequest; try { decoded = URLDecoder.decode(decoded, "UTF-8"); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } // decoded = StringUtils.replace(decoded, "_HTTPREQUESTLINEBREAK_", System.getProperty("line.separator")); return decoded; } /** * Divides the example text of a voice into sentences in a vector * * @param text * the example text * @return vector of example sentences */ public static Vector<String> processVoiceExampleText(String text) { StringTokenizer st = new StringTokenizer(text, "#"); Vector<String> sentences = null; while (st.hasMoreTokens()) { if (sentences == null) sentences = new Vector<String>(); sentences.add(st.nextToken()); } return sentences; } public static String toString(double[][] array) { String str = ""; int i, j; for (i = 0; i < array.length; i++) { for (j = 0; j < array[i].length; j++) { str += String.valueOf(array[i][j]); if (j < array[i].length - 1) str += " "; } str += System.getProperty("line.separator"); } str += System.getProperty("line.separator"); return str; } /** * Determine whether the given codepoint is either a letter or a modifier according to the Unicode standard. More precisely, * this returns true if codepoint belongs to one of the following categories as defined at * http://unicode.org/Public/UNIDATA/UCD.html#General_Category_Values: * <ul> * <li>Lu Letter, Uppercase</li> * <li>Ll Letter, Lowercase</li> * <li>Lt Letter, Titlecase</li> * <li>Lm Letter, Modifier</li> * <li>Lo Letter, Other</li> * <li>Mn Mark, Nonspacing</li> * <li>Mc Mark, Spacing Combining</li> * <li>Me Mark, Enclosing</li> * </ul> * Whether a given character is associated with this category can be looked up at * http://unicode.org/Public/UNIDATA/UnicodeData.txt * * @param codePoint * the unicode codepoint as determined e.g. by String.codePointAt(). * @return true if the above condition is met, false otherwise */ public static boolean isLetterOrModifier(int codePoint) { int type = Character.getType(codePoint); return type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.TITLECASE_LETTER || type == Character.MODIFIER_LETTER || type == Character.OTHER_LETTER || type == Character.NON_SPACING_MARK || type == Character.COMBINING_SPACING_MARK || type == Character.ENCLOSING_MARK; } public static String[] toStringLines(double[] x) { String[] y = null; if (x != null && x.length > 0) { y = new String[x.length]; for (int i = 0; i < x.length; i++) y[i] = String.valueOf(x[i]); } return y; } public static String[] toStringLines(float[] x) { String[] y = null; if (x != null && x.length > 0) { y = new String[x.length]; for (int i = 0; i < x.length; i++) y[i] = String.valueOf(x[i]); } return y; } public static String[] toStringLines(int[] x) { String[] y = null; if (x != null && x.length > 0) { y = new String[x.length]; for (int i = 0; i < x.length; i++) y[i] = String.valueOf(x[i]); } return y; } /** * Parse a string containing pairs of integers in brackets, and return as one array of integers. This will ignore any string * content that does not match the bracket pattern. * * @param attribute * - the string containing the bracketed expression. For example, 'f0' attribute of 'ph' element in MaryXML. * Expected format: "(5,248)(47,258)(100,433)" * @return an int array with an even number of elements, such that the i'th pair can be accessed as array[2*i] and * array[2*i+1], or an empty array if no bracket expressions are found. * @throws NullPointerException * if attribute is null. */ public static int[] parseIntPairs(String attribute) { if (attribute == null) { throw new NullPointerException("Received null argument"); } Pattern p = Pattern.compile("(\\d+,\\d+)"); int[] temp = new int[attribute.length() / 2]; // will definitely be more than long enough // Split input with the pattern Matcher m = p.matcher(attribute); int i = 0; // count pairs while (m.find()) { String[] f0Values = (m.group().trim()).split(","); temp[2 * i] = Integer.parseInt(f0Values[0]); temp[2 * i + 1] = Integer.parseInt(f0Values[1]); i++; } int[] result = new int[2 * i]; System.arraycopy(temp, 0, result, 0, result.length); return result; } public static void main(String[] args) throws Exception { String[] items1 = readTextFile("D:\\items.txt", "ASCII"); int[] inds1 = StringUtils.getDifferentItemsIndices(items1); String[] diffItems1 = StringUtils.getDifferentItemsList(items1); int[] items2 = { 1, 2, 3, 4, 1, 1, 2, 2, 4, 4, 10 }; int[] inds2 = StringUtils.getDifferentItemsIndices(items2); int[] diffItems2 = StringUtils.getDifferentItemsList(items2); System.out.println("Test completed...."); } }