// Copyright 2013 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.util;
import java.security.InvalidParameterException;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class StringUtils {
public enum Mode {
none, bracket, lower, umlaut,
}
public static double[] parseDoubleArray(String array_string,
Mutable<Integer> start_index) {
String[] element_strings = parseArray(array_string, start_index);
double[] array = new double[element_strings.length];
for (int index = 0; index < element_strings.length; index++) {
double element = Double.valueOf(element_strings[index]);
array[index] = element;
}
return array;
}
public static String[] parseArray(String array_string,
Mutable<Integer> index) {
int start_index = array_string.indexOf('[', index.get());
int end_index = array_string.indexOf(']', start_index);
if (start_index == -1 || end_index == -1) {
throw new InvalidParameterException("Not an array: " + array_string);
}
array_string = array_string.substring(start_index + 1, end_index);
index.set(end_index + 1);
if (array_string.length() == 0) {
return new String[0];
}
return array_string.split(",");
}
public static String reverse(String form) {
return new StringBuilder(form).reverse().toString();
}
static final Map<String, Character> BRACKET_MAP = new HashMap<String, Character>();
static {
BRACKET_MAP.put("-LRB-", '(');
BRACKET_MAP.put("-RRB-", ')');
BRACKET_MAP.put("-LCB-", '{');
BRACKET_MAP.put("-RCB-", '}');
BRACKET_MAP.put("-LSB-", '[');
BRACKET_MAP.put("-RSB-", ']');
}
public static String normalize(String word, Mode mode) {
if (mode == null || mode == Mode.none) {
return word;
}
StringBuilder sb = new StringBuilder(word.length());
int index = 0;
while (index < word.length()) {
char c = word.charAt(index);
if (c == '-' && index + 4 < word.length()) {
String bracket_string = word.substring(index, index + 5);
Character bracket_char = BRACKET_MAP.get(bracket_string);
if (bracket_char != null) {
c = bracket_char;
index += 4;
}
}
if (mode == Mode.lower || mode == Mode.umlaut) {
c = Character.toLowerCase(c);
if (Character.isDigit(c)) {
c = '0';
}
}
if (mode == Mode.umlaut) {
switch (c) {
case 'ß':
sb.append("ss");
break;
case 'ö':
sb.append("oe");
break;
case 'ü':
sb.append("ue");
break;
case 'ä':
sb.append("ae");
break;
default:
sb.append(c);
}
} else {
sb.append(c);
}
index++;
}
return sb.toString();
}
public static String join(List<String> segments) {
StringBuilder sb = new StringBuilder();
for (String segment : segments) {
sb.append(segment);
}
return sb.toString();
}
public static String clean(String input) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (Character.isWhitespace(c) || c == 160) {
c = ' ';
}
sb.append(c);
}
return sb.toString();
}
public static boolean containsUpperCase(String word) {
for (int i = 0; i < word.length(); i++) {
if (Character.isUpperCase(word.charAt(i)))
return true;
}
return false;
}
public static boolean containsLowerCase(String word) {
for (int i = 0; i < word.length(); i++) {
if (Character.isLowerCase(word.charAt(i)))
return true;
}
return false;
}
public static boolean containsDigit(String word) {
for (int i = 0; i < word.length(); i++) {
if (Character.isDigit(word.charAt(i)))
return true;
}
return false;
}
public static boolean containsHyphon(String word) {
for (int i = 0; i < word.length(); i++) {
if (word.charAt(i) == '-')
return true;
}
return false;
}
public static boolean containsSpecial(String word) {
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
if (isSpecial(c))
return true;
}
return false;
}
private static boolean isSpecial(char c) {
return !(Character.isLetter(c) || Character.isDigit(c));
}
public static String asciify(String form) {
return Normalizer.normalize(form, Normalizer.Form.NFD).replaceAll(
"[^\\p{ASCII}]", "");
}
public static enum Shape {
FirstCap, AllCap, Lower, Mixed, NoLetter;
}
public static Shape getShape(String word) {
int num_lower = 0;
int num_letter = 0;
int num_upper = 0;
for (int i=0; i<word.length(); i++) {
char c = word.charAt(i);
if (Character.isLetter(c)) {
num_letter ++;
if (Character.isLowerCase(c)) {
num_lower ++;
}
if (Character.isUpperCase(c)) {
num_upper ++;
}
}
}
if (num_letter == 0) {
return Shape.NoLetter;
}
if (num_lower == 0) {
return Shape.AllCap;
}
boolean first_cap = Character.isUpperCase(word.charAt(0));
if (first_cap && num_upper == 1) {
return Shape.FirstCap;
}
if (num_upper > 0) {
return Shape.Mixed;
}
return Shape.Lower;
}
public static String capitalize(String word) {
StringBuilder sb = new StringBuilder(word);
sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
return sb.toString();
}
public static boolean containsLetter(String word) {
for (int i=0; i<word.length(); i++) {
char c = word.charAt(i);
if (Character.isLetter(c)) {
return true;
}
}
return false;
}
}