// Copyright 2014 Michel Kraemer
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.undercouch.citeproc.helper;
/**
* Helper methods related to Strings
* @author Michel Kraemer
*/
public class StringHelper {
/**
* Hexadecimal characters
*/
private final static char[] HEX_DIGITS = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
/**
* Sanitizes a string so it can be used as an identifier
* @param s the string to sanitize
* @return the sanitized string
*/
public static String sanitize(String s) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
switch (c) {
case '\u00c0':
case '\u00c1':
case '\u00c3':
case '\u00c4':
sb.append('A');
break;
case '\u00c8':
case '\u00c9':
case '\u00cb':
sb.append('E');
break;
case '\u00cc':
case '\u00cd':
case '\u00cf':
sb.append('I');
break;
case '\u00d2':
case '\u00d3':
case '\u00d5':
case '\u00d6':
sb.append('O');
break;
case '\u00d9':
case '\u00da':
case '\u00dc':
sb.append('U');
break;
case '\u00e0':
case '\u00e1':
case '\u00e3':
case '\u00e4':
sb.append('a');
break;
case '\u00e8':
case '\u00e9':
case '\u00eb':
sb.append('e');
break;
case '\u00ec':
case '\u00ed':
case '\u00ef':
sb.append('i');
break;
case '\u00f2':
case '\u00f3':
case '\u00f6':
case '\u00f5':
sb.append('o');
break;
case '\u00f9':
case '\u00fa':
case '\u00fc':
sb.append('u');
break;
case '\u00d1':
sb.append('N');
break;
case '\u00f1':
sb.append('n');
break;
case '\u010c':
sb.append('C');
break;
case '\u0160':
sb.append('S');
break;
case '\u017d':
sb.append('Z');
break;
case '\u010d':
sb.append('c');
break;
case '\u0161':
sb.append('s');
break;
case '\u017e':
sb.append('z');
break;
case '\u00df':
sb.append("ss");
break;
default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9')) {
sb.append(c);
} else {
sb.append('_');
}
break;
}
}
return sb.toString();
}
/**
* Escapes characters in the given string according to Java rules
* @param s the string to escape
* @return the escpaped string
*/
public static String escapeJava(String s) {
if (s == null) {
return null;
}
StringBuilder sb = new StringBuilder(Math.min(2, s.length() * 3 / 2));
for (int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
if (c == '\b') {
sb.append("\\b");
} else if (c == '\n') {
sb.append("\\n");
} else if (c == '\t') {
sb.append("\\t");
} else if (c == '\f') {
sb.append("\\f");
} else if (c == '\r') {
sb.append("\\r");
} else if (c == '\\') {
sb.append("\\\\");
} else if (c == '"') {
sb.append("\\\"");
} else if (c < 32 || c > 0x7f) {
sb.append("\\u");
sb.append(hex4(c));
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* Converts the given character to a four-digit hexadecimal string
* @param c the character to convert
* @return the string
*/
private static String hex4(char c) {
char[] r = new char[] { '0', '0', '0', '0' };
int i = 3;
while (c > 0) {
r[i] = HEX_DIGITS[c & 0xF];
c >>>= 4;
--i;
}
return new String(r);
}
}