/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openflamingo.mapreduce.util;
import org.apache.commons.lang.ArrayUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* String Utility.
*
* @author Edward KIM
* @author Seo Ji Hye
* @since 0.1
*/
public class StringUtils {
private static final String EMPTY_STRING = "";
private static final String NULL_STRING = "null";
private static final String ARRAY_START = "{";
private static final String EXPRESSION_START = "#{";
private static final String ARRAY_END = "}";
private static final String EMPTY_ARRAY = ARRAY_START + ARRAY_END;
private static final String ARRAY_ELEMENT_SEPARATOR = ", ";
/**
* @link http://www.webreference.com/js/tips/000126.html
*/
public static String convertTabSpaceSpecialCharacters(String message) {
if (isEmpty(message)) {
return message;
}
if ("\\t".equals(message)) { // Tab
return replace(message, "\\t", "\u0009");
} else if ("\\s".equals(message)) { // Space
return replace(message, "\\s", "\u0020");
}
return message;
}
/**
* Replace all occurences of a substring within a string with another string.
*
* @param inString String to examine
* @param oldPattern String to replace
* @param newPattern String to insert
* @return a String with the replacements
*/
public static String replace(String inString, String oldPattern, String newPattern) {
if (!hasLength(inString) || !hasLength(oldPattern) || newPattern == null) {
return inString;
}
StringBuffer sbuf = new StringBuffer();
// output StringBuffer we'll build up
int pos = 0; // our position in the old string
int index = inString.indexOf(oldPattern);
// the index of an occurrence we've found, or -1
int patLen = oldPattern.length();
while (index >= 0) {
sbuf.append(inString.substring(pos, index));
sbuf.append(newPattern);
pos = index + patLen;
index = inString.indexOf(oldPattern, pos);
}
sbuf.append(inString.substring(pos));
// remember to append any characters to the right of a match
return sbuf.toString();
}
/**
* Check that the given CharSequence is neither <code>null</code> nor of length 0.
* Note: Will return <code>true</code> for a CharSequence that purely consists of whitespace.
* <p><pre>
* StringUtils.hasLength(null) = false
* StringUtils.hasLength("") = false
* StringUtils.hasLength(" ") = true
* StringUtils.hasLength("Hello") = true
* </pre>
*
* @param str the CharSequence to check (may be <code>null</code>)
* @return <code>true</code> if the CharSequence is not null and has length
*/
public static boolean hasLength(CharSequence str) {
return (str != null && str.length() > 0);
}
/**
* Check that the given String is neither <code>null</code> nor of length 0.
* Note: Will return <code>true</code> for a String that purely consists of whitespace.
*
* @param str the String to check (may be <code>null</code>)
* @return <code>true</code> if the String is not null and has length
* @see #hasLength(CharSequence)
*/
public static boolean hasLength(String str) {
return hasLength((CharSequence) str);
}
/**
* <p>Checks if a String is empty ("") or null.</p>
* <p/>
* <pre>
* StringUtils.isEmpty(null) = true
* StringUtils.isEmpty("") = true
* StringUtils.isEmpty(" ") = false
* StringUtils.isEmpty("bob") = false
* StringUtils.isEmpty(" bob ") = false
* </pre>
* <p/>
* <p>NOTE: This method changed in Lang version 2.0.
* It no longer trims the String.
* That functionality is available in isBlank().</p>
*
* @param str the String to check, may be null
* @return <code>true</code> if the String is empty or null
*/
public static boolean isEmpty(String str) {
return str == null || str.length() == 0;
}
/**
* <p>Checks if a String is not empty ("") and not null.</p>
* <p/>
* <pre>
* StringUtils.isNotEmpty(null) = false
* StringUtils.isNotEmpty("") = false
* StringUtils.isNotEmpty(" ") = true
* StringUtils.isNotEmpty("bob") = true
* StringUtils.isNotEmpty(" bob ") = true
* </pre>
*
* @param str the String to check, may be null
* @return <code>true</code> if the String is not empty and not null
*/
public static boolean isNotEmpty(String str) {
return !StringUtils.isEmpty(str);
}
/**
* <p>Checks if a String is whitespace, empty ("") or null.</p>
* <p/>
* <pre>
* StringUtils.isBlank(null) = true
* StringUtils.isBlank("") = true
* StringUtils.isBlank(" ") = true
* StringUtils.isBlank("bob") = false
* StringUtils.isBlank(" bob ") = false
* </pre>
*
* @param str the String to check, may be null
* @return <code>true</code> if the String is null, empty or whitespace
* @since 2.0
*/
public static boolean isBlank(String str) {
int strLen;
if (str == null || (strLen = str.length()) == 0) {
return true;
}
for (int i = 0; i < strLen; i++) {
if ((Character.isWhitespace(str.charAt(i)) == false)) {
return false;
}
}
return true;
}
/**
* <p>Checks if a String is not empty (""), not null and not whitespace only.</p>
* <p/>
* <pre>
* StringUtils.isNotBlank(null) = false
* StringUtils.isNotBlank("") = false
* StringUtils.isNotBlank(" ") = false
* StringUtils.isNotBlank("bob") = true
* StringUtils.isNotBlank(" bob ") = true
* </pre>
*
* @param str the String to check, may be null
* @return <code>true</code> if the String is
* not empty and not null and not whitespace
* @since 2.0
*/
public static boolean isNotBlank(String str) {
return !StringUtils.isBlank(str);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return ArrayUtils.EMPTY_STRING_ARRAY;
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
if ("".equals(str.substring(start, i)) == false)
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
/**
* <p>Splits the provided text into an array with a maximum length,
* separators specified, preserving all tokens, including empty tokens
* created by adjacent separators.</p>
* <p/>
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* Adjacent separators are treated as one separator.</p>
* <p/>
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
* <p/>
* <p>If more than <code>max</code> delimited substrings are found, the last
* returned string includes all characters after the first <code>max - 1</code>
* returned strings (including separator characters).</p>
* <p/>
* <pre>
* StringUtils.splitPreserveAllTokens(null, *, *) = null
* StringUtils.splitPreserveAllTokens("", *, *) = []
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) {
return splitWorker(str, separatorChars, max, true);
}
/**
* <p>Splits the provided text into an array, separators specified,
* preserving all tokens, including empty tokens created by adjacent
* separators. This is an alternative to using StringTokenizer.</p>
* <p/>
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.</p>
* <p/>
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
* <p/>
* <pre>
* StringUtils.splitPreserveAllTokens(null, *) = null
* StringUtils.splitPreserveAllTokens("", *) = []
* StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
* StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
* StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
* StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(String str, String separatorChars) {
return splitWorker(str, separatorChars, -1, true);
}
/**
* Take a String which is a delimited list and convert it to a String array.
* <p>A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to <code>tokenizeToStringArray</code>.
*
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @return an array of the tokens in the list
*/
public static String[] delimitedListToStringArray(String str, String delimiter) {
return delimitedListToStringArray(str, delimiter, null);
}
/**
* Take a String which is a delimited list and convert it to a String array.
* <p>A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to <code>tokenizeToStringArray</code>.
*
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @param charsToDelete a set of characters to delete. Useful for deleting unwanted
* line breaks: e.g. "\r\n\f" will delete all new lines and line feeds in a String.
* @return an array of the tokens in the list
*/
public static String[] delimitedListToStringArray(String str, String delimiter, String charsToDelete) {
if (str == null) {
return new String[0];
}
if (delimiter == null) {
return new String[]{str};
}
List<String> result = new ArrayList<String>();
if ("".equals(delimiter)) {
for (int i = 0; i < str.length(); i++) {
result.add(deleteAny(str.substring(i, i + 1), charsToDelete));
}
} else {
int pos = 0;
int delPos;
while ((delPos = str.indexOf(delimiter, pos)) != -1) {
result.add(deleteAny(str.substring(pos, delPos), charsToDelete));
pos = delPos + delimiter.length();
}
if (str.length() > 0 && pos <= str.length()) {
// 문자열의 나머지를 추가한다. 빈 문자열인 경우 추가하지 않는다.
result.add(deleteAny(str.substring(pos), charsToDelete));
}
}
return toStringArray(result);
}
/**
* 지정한 문자열 콜렉션을 문자열 배열로 복사한다.
* 콜렉션은 반드시 1개 이상의 엘리먼트가 포함되어야 하며 문자열이어야 한다.
*
* @param collection 복사할 콜렉션
* @return 문자열 배열 (콜렉션이 <tt>null인 경우 <tt>null</tt>)
*/
public static String[] toStringArray(Collection<String> collection) {
if (collection == null) {
return null;
}
return collection.toArray(new String[collection.size()]);
}
/**
* 콤마가 구분자인 CSV 문자열을 문자열 배열로 변환한다.
*
* @param str 콤마를 구분자로 가진 CSV 문자열
* @return 문자열 배열 또는 입력이 비어 있는 경우 빈 배열
*/
public static String[] commaDelimitedListToStringArray(String str) {
return delimitedListToStringArray(str, ",");
}
/**
* 지정한 문자열에서 문자를 삭제한다.
*
* @param inString 원본 문자열
* @param charsToDelete 삭제할 문자
* @return 결과 문자열
*/
public static String deleteAny(String inString, String charsToDelete) {
if (!hasLength(inString) || !hasLength(charsToDelete)) {
return inString;
}
StringBuilder builder = new StringBuilder();
for (int i = 0; i < inString.length(); i++) {
char c = inString.charAt(i);
if (charsToDelete.indexOf(c) == -1) {
builder.append(c);
}
}
return builder.toString();
}
}