package org.jabref.model.strings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.CharMatcher;
import org.apache.commons.lang3.StringUtils;
public class StringUtil {
// Non-letters which are used to denote accents in LaTeX-commands, e.g., in {\"{a}}
public static final String SPECIAL_COMMAND_CHARS = "\"`^~'=.|";
// contains all possible line breaks, not omitting any break such as "\\n"
private static final Pattern LINE_BREAKS = Pattern.compile("\\r\\n|\\r|\\n");
private static final Pattern BRACED_TITLE_CAPITAL_PATTERN = Pattern.compile("\\{[A-Z]+\\}");
private static final UnicodeToReadableCharMap UNICODE_CHAR_MAP = new UnicodeToReadableCharMap();
public static String booleanToBinaryString(boolean expression) {
return expression ? "1" : "0";
}
/**
* Quote special characters.
*
* @param toQuote The String which may contain special characters.
* @param specials A String containing all special characters except the quoting
* character itself, which is automatically quoted.
* @param quoteChar The quoting character.
* @return A String with every special character (including the quoting
* character itself) quoted.
*/
public static String quote(String toQuote, String specials, char quoteChar) {
if (toQuote == null) {
return "";
}
StringBuilder result = new StringBuilder();
char c;
boolean isSpecial;
for (int i = 0; i < toQuote.length(); ++i) {
c = toQuote.charAt(i);
isSpecial = (c == quoteChar);
// If non-null specials performs logic-or with specials.indexOf(c) >= 0
isSpecial |= ((specials != null) && (specials.indexOf(c) >= 0));
if (isSpecial) {
result.append(quoteChar);
}
result.append(c);
}
return result.toString();
}
/**
* Creates a substring from a text
*
* @param text
* @param startIndex
* @param terminateOnEndBraceOnly
* @return
*/
public static String getPart(String text, int startIndex, boolean terminateOnEndBraceOnly) {
char c;
int count = 0;
StringBuilder part = new StringBuilder();
// advance to first char and skip whitespace
int index = startIndex + 1;
while ((index < text.length()) && Character.isWhitespace(text.charAt(index))) {
index++;
}
// then grab whatever is the first token (counting braces)
while (index < text.length()) {
c = text.charAt(index);
if (!terminateOnEndBraceOnly && (count == 0) && Character.isWhitespace(c)) {
// end argument and leave whitespace for further processing
break;
}
if ((c == '}') && (--count < 0)) {
break;
} else if (c == '{') {
count++;
}
part.append(c);
index++;
}
return part.toString();
}
/**
* Returns the string, after shaving off whitespace at the beginning and end,
* and removing (at most) one pair of braces or " surrounding it.
*
* @param toShave
* @return
*/
public static String shaveString(String toShave) {
if ((toShave == null) || (toShave.isEmpty())) {
return "";
}
String shaved = toShave.trim();
if (isInCurlyBrackets(shaved) || isInCitationMarks(shaved)) {
return shaved.substring(1, shaved.length() - 1);
}
return shaved;
}
/**
* Concatenate all strings in the array from index 'from' to 'to' (excluding
* to) with the given separator.
* <p>
* Example:
* <p>
* String[] s = "ab/cd/ed".split("/"); join(s, "\\", 0, s.length) ->
* "ab\\cd\\ed"
*
* @param strings
* @param separator
* @param from
* @param to Excluding strings[to]
* @return
*/
public static String join(String[] strings, String separator, int from, int to) {
if ((strings.length == 0) || (from >= to)) {
return "";
}
int updatedFrom = Math.max(from, 0);
int updatedTo = Math.min(strings.length, to);
StringBuilder stringBuilder = new StringBuilder();
for (int i = updatedFrom; i < (updatedTo - 1); i++) {
stringBuilder.append(strings[i]).append(separator);
}
return stringBuilder.append(strings[updatedTo - 1]).toString();
}
/**
* Removes optional square brackets from the string s
*
* @param toStrip
* @return
*/
public static String stripBrackets(String toStrip) {
if (isInSquareBrackets(toStrip)) {
return toStrip.substring(1, toStrip.length() - 1);
}
return toStrip;
}
/**
* extends the filename with a default Extension, if no Extension '.x' could
* be found
*/
public static String getCorrectFileName(String orgName, String defaultExtension) {
if (orgName == null) {
return "";
}
if (orgName.toLowerCase(Locale.ROOT).endsWith("." + defaultExtension.toLowerCase(Locale.ROOT))) {
return orgName;
}
int hiddenChar = orgName.indexOf('.', 1); // hidden files Linux/Unix (?)
if (hiddenChar < 1) {
return orgName + "." + defaultExtension;
}
return orgName;
}
/**
* Formats field contents for output. Must be "symmetric" with the parse method above,
* so stored and reloaded fields are not mangled.
*
* @param in
* @param wrapAmount
* @param newline
* @return the wrapped String.
*/
public static String wrap(String in, int wrapAmount, String newline) {
String[] lines = in.split("\n");
StringBuilder result = new StringBuilder();
// remove all whitespace at the end of the string, this especially includes \r created when the field content has \r\n as line separator
addWrappedLine(result, CharMatcher.WHITESPACE.trimTrailingFrom(lines[0]), wrapAmount, newline); // See
for (int i = 1; i < lines.length; i++) {
if (lines[i].trim().isEmpty()) {
result.append(newline);
result.append('\t');
} else {
result.append(newline);
result.append('\t');
result.append(newline);
result.append('\t');
// remove all whitespace at the end of the string, this especially includes \r created when the field content has \r\n as line separator
String line = CharMatcher.WHITESPACE.trimTrailingFrom(lines[i]);
addWrappedLine(result, line, wrapAmount, newline);
}
}
return result.toString();
}
private static void addWrappedLine(StringBuilder result, String line, int wrapAmount, String newline) {
// Set our pointer to the beginning of the new line in the StringBuffer:
int length = result.length();
// Add the line, unmodified:
result.append(line);
while (length < result.length()) {
int current = result.indexOf(" ", length + wrapAmount);
if ((current < 0) || (current >= result.length())) {
break;
}
result.deleteCharAt(current);
result.insert(current, newline + "\t");
length = current + newline.length();
}
}
/**
* Quotes each and every character, e.g. '!' as !. Used for verbatim
* display of arbitrary strings that may contain HTML entities.
*/
public static String quoteForHTML(String toQuote) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < toQuote.length(); ++i) {
result.append("").append((int) toQuote.charAt(i)).append(';');
}
return result.toString();
}
/**
* Decodes an encoded double String array back into array form. The array
* is assumed to be square, and delimited by the characters ';' (first dim) and
* ':' (second dim).
* @param value The encoded String to be decoded.
* @return The decoded String array.
*/
public static String[][] decodeStringDoubleArray(String value) {
List<List<String>> newList = new ArrayList<>();
StringBuilder sb = new StringBuilder();
List<String> thisEntry = new ArrayList<>();
boolean escaped = false;
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if (!escaped && (c == '\\')) {
escaped = true;
continue;
} else if (!escaped && (c == ':')) {
thisEntry.add(sb.toString());
sb = new StringBuilder();
} else if (!escaped && (c == ';')) {
thisEntry.add(sb.toString());
sb = new StringBuilder();
newList.add(thisEntry);
thisEntry = new ArrayList<>();
} else {
sb.append(c);
}
escaped = false;
}
if (sb.length() > 0) {
thisEntry.add(sb.toString());
}
if (!thisEntry.isEmpty()) {
newList.add(thisEntry);
}
// Convert to String[][]:
String[][] res = new String[newList.size()][];
for (int i = 0; i < res.length; i++) {
res[i] = new String[newList.get(i).size()];
for (int j = 0; j < res[i].length; j++) {
res[i][j] = newList.get(i).get(j);
}
}
return res;
}
/**
* Wrap all uppercase letters, or sequences of uppercase letters, in curly
* braces. Ignore letters within a pair of # character, as these are part of
* a string label that should not be modified.
*
* @param s
* The string to modify.
* @return The resulting string after wrapping capitals.
*/
public static String putBracesAroundCapitals(String s) {
boolean inString = false;
boolean isBracing = false;
boolean escaped = false;
int inBrace = 0;
StringBuilder buf = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
// Update variables based on special characters:
int c = s.charAt(i);
if (c == '{') {
inBrace++;
} else if (c == '}') {
inBrace--;
} else if (!escaped && (c == '#')) {
inString = !inString;
}
// See if we should start bracing:
if ((inBrace == 0) && !isBracing && !inString && Character.isLetter((char) c)
&& Character.isUpperCase((char) c)) {
buf.append('{');
isBracing = true;
}
// See if we should close a brace set:
if (isBracing && !(Character.isLetter((char) c) && Character.isUpperCase((char) c))) {
buf.append('}');
isBracing = false;
}
// Add the current character:
buf.append((char) c);
// Check if we are entering an escape sequence:
escaped = (c == '\\') && !escaped;
}
// Check if we have an unclosed brace:
if (isBracing) {
buf.append('}');
}
return buf.toString();
}
/**
* This method looks for occurrences of capital letters enclosed in an
* arbitrary number of pairs of braces, e.g. "{AB}" or "{{T}}". All of these
* pairs of braces are removed.
*
* @param s
* The String to analyze.
* @return A new String with braces removed.
*/
public static String removeBracesAroundCapitals(String s) {
String current = s;
String previous = s;
while ((current = removeSingleBracesAroundCapitals(current)).length() < previous.length()) {
previous = current;
}
return current;
}
/**
* This method looks for occurrences of capital letters enclosed in one pair
* of braces, e.g. "{AB}". All these are replaced by only the capitals in
* between the braces.
*
* @param s
* The String to analyze.
* @return A new String with braces removed.
*/
private static String removeSingleBracesAroundCapitals(String s) {
Matcher mcr = BRACED_TITLE_CAPITAL_PATTERN.matcher(s);
StringBuffer buf = new StringBuffer();
while (mcr.find()) {
String replaceStr = mcr.group();
mcr.appendReplacement(buf, replaceStr.substring(1, replaceStr.length() - 1));
}
mcr.appendTail(buf);
return buf.toString();
}
/**
* Replaces all platform-dependent line breaks by OS.NEWLINE line breaks.
*
* We do NOT use UNIX line breaks as the user explicitly configures its linebreaks and this method is used in bibtex field writing
*
* <example>
* Legacy Macintosh \r -> OS.NEWLINE
* Windows \r\n -> OS.NEWLINE
* </example>
*
* @return a String with only OS.NEWLINE as line breaks
*/
public static String unifyLineBreaks(String s, String newline) {
return LINE_BREAKS.matcher(s).replaceAll(newline);
}
/**
* Checks if the given String has exactly one pair of surrounding curly braces <br>
* Strings with escaped characters in curly braces at the beginning and end are respected, too
* @param toCheck The string to check
* @return True, if the check was succesful. False otherwise.
*/
public static boolean isInCurlyBrackets(String toCheck) {
int count = 0;
int brackets = 0;
if ((toCheck == null) || toCheck.isEmpty()) {
return false;
} else {
if ((toCheck.charAt(0) == '{') && (toCheck.charAt(toCheck.length() - 1) == '}')) {
for (char c : toCheck.toCharArray()) {
if (c == '{') {
if (brackets == 0) {
count++;
}
brackets++;
} else if (c == '}') {
brackets--;
}
}
return count == 1;
}
return false;
}
}
public static boolean isInSquareBrackets(String toCheck) {
if ((toCheck == null) || toCheck.isEmpty()) {
return false; // In case of null or empty string
} else {
return (toCheck.charAt(0) == '[') && (toCheck.charAt(toCheck.length() - 1) == ']');
}
}
public static boolean isInCitationMarks(String toCheck) {
if ((toCheck == null) || (toCheck.length() <= 1)) {
return false; // In case of null, empty string, or a single citation mark
} else {
return (toCheck.charAt(0) == '"') && (toCheck.charAt(toCheck.length() - 1) == '"');
}
}
/**
* Optimized method for converting a String into an Integer
*
* From http://stackoverflow.com/questions/1030479/most-efficient-way-of-converting-string-to-integer-in-java
*
* @param str the String holding an Integer value
* @throws NumberFormatException if str cannot be parsed to an int
* @return the int value of str
*/
public static int intValueOf(String str) {
int idx = 0;
int end;
boolean sign = false;
char ch;
if ((str == null) || ((end = str.length()) == 0) || ((((ch = str.charAt(0)) < '0') || (ch > '9')) && (!(sign = ch == '-') || (++idx == end) || ((ch = str.charAt(idx)) < '0') || (ch > '9')))) {
throw new NumberFormatException(str);
}
int ival = 0;
for (;; ival *= 10) {
ival += '0' - ch;
if (++idx == end) {
return sign ? ival : -ival;
}
if (((ch = str.charAt(idx)) < '0') || (ch > '9')) {
throw new NumberFormatException(str);
}
}
}
/**
* Optimized method for converting a String into an Integer
*
* From http://stackoverflow.com/questions/1030479/most-efficient-way-of-converting-string-to-integer-in-java
*
* @param str the String holding an Integer value
* @return the int value of str or Optional.empty() if not possible
*/
public static Optional<Integer> intValueOfOptional(String str) {
int idx = 0;
int end;
boolean sign = false;
char ch;
if ((str == null) || ((end = str.length()) == 0) || ((((ch = str.charAt(0)) < '0') || (ch > '9')) && (!(sign = ch == '-') || (++idx == end) || ((ch = str.charAt(idx)) < '0') || (ch > '9')))) {
return Optional.empty();
}
int ival = 0;
for (;; ival *= 10) {
ival += '0' - ch;
if (++idx == end) {
return Optional.of(sign ? ival : -ival);
}
if (((ch = str.charAt(idx)) < '0') || (ch > '9')) {
return Optional.empty();
}
}
}
/**
* This method ensures that the output String has only
* valid XML unicode characters as specified by the
* XML 1.0 standard. For reference, please see
* <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
* standard</a>. This method will return an empty
* String if the input is null or empty.
* <p>
* URL: http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html
*
* @param in The String whose non-valid characters we want to remove.
* @return The in String, stripped of non-valid characters.
*/
public static String stripNonValidXMLCharacters(String in) {
if ((in == null) || in.isEmpty()) {
return ""; // vacancy test.
}
StringBuilder out = new StringBuilder(); // Used to hold the output.
char current; // Used to reference the current character.
for (int i = 0; i < in.length(); i++) {
current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
if ((current == 0x9) || (current == 0xA) || (current == 0xD) || ((current >= 0x20) && (current <= 0xD7FF))
|| ((current >= 0xE000) && (current <= 0xFFFD))) {
out.append(current);
}
}
return out.toString();
}
/*
* @param buf String to be tokenized
* @param delimstr Delimiter string
* @return list {@link java.util.List} of <tt>String</tt>
*/
public static List<String> tokenizeToList(String buf, String delimstr) {
List<String> list = new ArrayList<>();
String buffer = buf + '\n';
StringTokenizer st = new StringTokenizer(buffer, delimstr);
while (st.hasMoreTokens()) {
list.add(st.nextToken());
}
return list;
}
public static String limitStringLength(String s, int maxLength) {
if (s == null) {
return "";
}
if (s.length() <= maxLength) {
return s;
}
return s.substring(0, maxLength - 3) + "...";
}
/**
* Replace non-English characters like umlauts etc. with a sensible letter or letter combination that bibtex can
* accept. The basis for replacement is the HashMap UnicodeToReadableCharMap.
*/
public static String replaceSpecialCharacters(String s) {
String result = s;
for (Map.Entry<String, String> chrAndReplace : UNICODE_CHAR_MAP.entrySet()) {
result = result.replace(chrAndReplace.getKey(), chrAndReplace.getValue());
}
return result;
}
/**
* Return a String with n spaces
*
* @param n Number of spaces
* @return String with n spaces
*/
public static String repeatSpaces(int n) {
return repeat(n, ' ');
}
/**
* Return a String with n copies of the char c
*
* @param n Number of copies
* @param c char to copy
* @return String with n copies of c
*/
public static String repeat(int n, char c) {
StringBuilder resultSB = new StringBuilder(n);
for (int i = 0; i < n; i++) {
resultSB.append(c);
}
return resultSB.toString();
}
public static boolean isNullOrEmpty(String toTest) {
return ((toTest == null) || toTest.isEmpty());
}
public static boolean isBlank(String string) {
return !isNotBlank(string);
}
public static boolean isBlank(Optional<String> string) {
return !isNotBlank(string);
}
public static boolean isNotBlank(String string) {
return StringUtils.isNotBlank(string);
}
public static boolean isNotBlank(Optional<String> string) {
return string.isPresent() && isNotBlank(string.get());
}
/**
* Return string enclosed in HTML bold tags
*/
public static String boldHTML(String input) {
return "<b>" + input + "</b>";
}
/**
* Return string enclosed in HTML bold tags if not null, otherwise return alternative text in HTML bold tags
*/
public static String boldHTML(String input, String alternative) {
if (input == null) {
return "<b>" + alternative + "</b>";
}
return "<b>" + input + "</b>";
}
/**
* Unquote special characters.
*
* @param toUnquote The String which may contain quoted special characters.
* @param quoteChar The quoting character.
* @return A String with all quoted characters unquoted.
*/
public static String unquote(String toUnquote, char quoteChar) {
StringBuilder result = new StringBuilder();
char c;
boolean quoted = false;
for (int i = 0; i < toUnquote.length(); ++i) {
c = toUnquote.charAt(i);
if (quoted) { // append literally...
if (c != '\n') {
result.append(c);
}
quoted = false;
} else if (c == quoteChar) {
// quote char
quoted = true;
} else {
result.append(c);
}
}
return result.toString();
}
public static String stripAccents(String searchQuery) {
return StringUtils.stripAccents(searchQuery);
}
/**
* Make first character of String uppercase, and the
* rest lowercase.
*/
public static String capitalizeFirst(String toCapitalize) {
if (toCapitalize.length() > 1) {
return toCapitalize.substring(0, 1).toUpperCase(Locale.ROOT)
+ toCapitalize.substring(1, toCapitalize.length()).toLowerCase(Locale.ROOT);
} else {
return toCapitalize.toUpperCase(Locale.ROOT);
}
}
/**
* Returns a list of words contained in the given text.
* Whitespace, comma and semicolon are considered as separator between words.
*
* @param text the input
* @return a list of words
*/
public static List<String> getStringAsWords(String text) {
return Arrays.asList(text.split("[\\s,;]+"));
}
public static boolean containsIgnoreCase(String text, String searchString) {
return StringUtils.containsIgnoreCase(text, searchString);
}
}