/* * #! * Ontopia Engine * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.utils; import java.io.Writer; import java.io.IOException; import java.security.MessageDigest; import java.util.Collection; import java.util.Iterator; import java.util.Random; import java.util.StringTokenizer; /** * INTERNAL: Class that contains useful string operation methods. */ public class StringUtils { private static Random rand = new Random(); // essential to only create it once /** * INTERNAL: A string used internally for various control flow * purposes. It is a string that is extremely unlikely to occur in * real-world data. */ public static final String VERY_UNLIKELY_STRING = "_________________VERY UNLIKELY STRING_____________"; /** * INTERNAL: Replaces occurrences of a string within a string, * returning a new string where the substitutions have been * performed. */ public static String replace(String str, String oldvalue, String newvalue) { int match = str.indexOf(oldvalue); while (match != -1) { // TODO: This is not very fast is it? str = str.substring(0, match) + newvalue + str.substring(match + oldvalue.length()); match = str.indexOf(oldvalue,match+newvalue.length()); } return str; } /** * INTERNAL: Replaces occurrences of a char within a string, * returning a new string where the substitutions have been * performed. * * @since 2.0 */ public static String replace(String str, char oldvalue, String newvalue) { int match = str.indexOf(oldvalue); while (match != -1) { str = str.substring(0, match) + newvalue + str.substring(match + 1); match = str.indexOf(oldvalue,match+newvalue.length()); } return str; } /** * INTERNAL: Splits a string on occurrences of a given * substring. The separator is " ". */ public static String[] split(String str) { return split(str, " "); } /** * INTERNAL: Splits a string on occurrences of a given substring. */ public static String[] split(String str, String separator) { int nPos = 0; int nStartPos = 0; int nIndex = 0; String[] splitArray = null; int nSplitResults = 1; // Count number of pieces while ((nPos = str.indexOf (separator, nStartPos)) != -1) { nStartPos = nPos + 1; nSplitResults++; } // Create array and set us up splitArray = new String[nSplitResults]; nPos = 0; nStartPos = 0; // Do the split for (nIndex = 0; nIndex < nSplitResults-1; nIndex++) { nPos = str.indexOf (separator, nStartPos); splitArray[nIndex] = str.substring (nStartPos, nPos); nStartPos = nPos + 1; } splitArray[nIndex] = str.substring (nStartPos); return splitArray; } /** * INTERNAL: Tokenizes a string on occurrences of a delimiters. This * method is effectively using a java.util.StringTokenizer * underneath. * * @since 3.1 */ public static String[] tokenize(String str, String delimiters) { String[] values = new String[1]; int c = 0; StringTokenizer st = new StringTokenizer(str, delimiters); while (st.hasMoreTokens()) { if (c > values.length-1) { // increase size of array String[] newValues = new String[(values.length * 3)/2 + 1]; System.arraycopy(values, 0, newValues, 0, values.length); values = newValues; } values[c] = st.nextToken(); c++; } if (c < values.length) { // reduce size of array String[] newValues = new String[c]; System.arraycopy(values, 0, newValues, 0, c); values = newValues; } return values; } /** * INTERNAL: Trims each individual string in the array. Note that * this method will effectively replace the strings in the * array. The string array is returned just for convenience. * * @since 3.1 */ public static String[] trim(String[] str) { for (int i=0; i < str.length; i++) { if (str[i] == null) continue; else str[i] = str[i].trim(); } return str; } /** * INTERNAL: Joins the objects in a collection (turned into strings * by toString) with a separator string. */ public static String join(Collection<?> objects, String separator) { if (objects.isEmpty()) return ""; Iterator<?> iter = objects.iterator(); StringBuilder list = new StringBuilder(); list.append(iter.next()); while (iter.hasNext()) { list.append(separator); list.append(iter.next()); } return list.toString(); } /** * INTERNAL: Joins the objects in a collection (turned into strings * by the stringifier) with a separator string. * * @since 2.0 */ public static <T> String join(Collection<T> objects, String separator, StringifierIF<T> stringifier) { if (objects.isEmpty()) return ""; Iterator<T> iter = objects.iterator(); StringBuilder list=new StringBuilder(); list.append(stringifier.toString(iter.next())); while (iter.hasNext()) { list.append(separator); list.append(stringifier.toString(iter.next())); } return list.toString(); } /** * INTERNAL: Joins the objects in an array (turned into strings by * toString) with a separator string. */ public static String join(Object[] objects, String separator) { if (objects.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(objects[0]); for (int ix = 1; ix < objects.length; ix++) { list.append(separator); list.append(objects[ix]); } return list.toString(); } /** * INTERNAL: Joins the objects in an array (turned into strings by * toString) with a separator string. * * @since 3.1 */ public static String join(Object[] objects, char separator) { if (objects.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(objects[0]); for (int ix = 1; ix < objects.length; ix++) { list.append(separator); list.append(objects[ix]); } return list.toString(); } /** * INTERNAL: Joins the ints in an array (turned into strings by * toString) with a separator string.<p> * * @since 1.3.4 */ public static String join(int[] vals, String separator) { if (vals.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(vals[0]); for (int ix = 1; ix < vals.length; ix++) { list.append(separator); list.append(vals[ix]); } return list.toString(); } /** * INTERNAL: Joins the longs in an array (turned into strings by * toString) with a separator string.<p> * * @since 2.0 */ public static String join(long[] vals, String separator) { if (vals.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(vals[0]); for (int ix = 1; ix < vals.length; ix++) { list.append(separator); list.append(vals[ix]); } return list.toString(); } /** * INTERNAL: Joins the booleans in an array (turned into strings by * toString) with a separator string.<p> * * @since 2.0 */ public static String join(boolean[] vals, String separator) { if (vals.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(vals[0]); for (int ix = 1; ix < vals.length; ix++) { list.append(separator); list.append(vals[ix]); } return list.toString(); } /** * INTERNAL: Joins the bytes in an array (turned into strings by * toString) with a separator string.<p> * * @since 2.0 */ public static String join(byte[] vals, String separator) { if (vals.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(vals[0]); for (int ix = 1; ix < vals.length; ix++) { list.append(separator); list.append(vals[ix]); } return list.toString(); } /** * INTERNAL: Joins the characters in an array (turned into strings * by toString) with a separator string.<p> * * @since 2.0 */ public static String join(char[] vals, String separator) { if (vals.length == 0) return ""; StringBuilder list = new StringBuilder(); list.append(vals[0]); for (int ix = 1; ix < vals.length; ix++) { list.append(separator); list.append(vals[ix]); } return list.toString(); } /** * INTERNAL: Joins the objects in a collection (turned into strings * by toString) with a separator string. The result is appended to * the specified StringBuilder. * * @since 1.3.2 */ public static void join(Collection<?> objects, String separator, StringBuilder sb) { if (objects.isEmpty()) return; Iterator<?> iter = objects.iterator(); sb.append(iter.next()); while (iter.hasNext()) { sb.append(separator); sb.append(iter.next()); } } /** * INTERNAL: Joins the objects in an array (turned into strings by * toString) with a separator string. The result is appended to the * specified StringBuilder. * * @since 1.3.2 */ public static void join(Object[] objects, String separator, StringBuilder sb) { if (objects.length == 0) return; sb.append(objects[0]); for (int ix = 1; ix < objects.length; ix++) { sb.append(separator); sb.append(objects[ix]); } } /** * INTERNAL: Joins the non-null objects in an array (turned into * strings by toString) with a separator string. Note that nulls in * the array are ignored. * * @since 1.2.5 */ public static String join(Object[] objects, String separator, boolean remove_nulls) { if (objects == null || objects.length == 0) return ""; if (remove_nulls) { StringBuilder list = new StringBuilder(); boolean subseq = false; for (int ix = 0; ix < objects.length; ix++) { if (objects[ix] != null) { if (subseq) list.append(separator); else subseq = true; list.append(objects[ix]); } } return list.toString(); } else { // Delegate to other join method if not set to remove nulls. return StringUtils.join(objects, separator); } } /** * INTERNAL: Compare two string values lexically. This method can * handle nulls values. Nulls sort lower than non-nulls.<p> * * @since 3.1 */ public static int compare(String s1, String s2) { if (s1 == null && s2 == null) return 0; else if (s1 == null) return -1; else if (s2 == null) return 1; else return s1.compareTo(s2); } /** * INTERNAL: Compare two string values lexically ignoring case. This * method can handle nulls values. Nulls sort lower than non-nulls.<p> * * @since 3.1 */ public static int compareToIgnoreCase(String s1, String s2) { if (s1 == null && s2 == null) return 0; else if (s1 == null) return -1; else if (s2 == null) return 1; else return s1.compareToIgnoreCase(s2); } /** * INTERNAL: Replaces sequences of one or more ' ', \t, \n, \r by a * single space, returning the new string. */ public static String normalizeWhitespace(String source) { char[] string = source.toCharArray(); int pos = 0; boolean previousWasWS = false; for (int ix = 0; ix < string.length; ix++) { switch (string[ix]) { case ' ': case '\t': case '\n': case '\r': previousWasWS = true; break; default: if (previousWasWS) { string[pos++] = ' '; previousWasWS = false; } string[pos++] = string[ix]; } } if (previousWasWS) string[pos++] = ' '; return new String(string, 0, pos); } /** * INTERNAL: Replaces sequences of one or more characters that * report as whitespace through Character.isWhitespace(char) by a * single space, returning the new string. * @since 3.3.0 */ public static String normalizeIsWhitespace(String source) { char[] string = source.toCharArray(); int pos = 0; boolean previousWasWS = false; for (int ix = 0; ix < string.length; ix++) { if (Character.isWhitespace(string[ix])) { previousWasWS = true; } else { if (previousWasWS) { string[pos++] = ' '; previousWasWS = false; } string[pos++] = string[ix]; } } if (previousWasWS) string[pos++] = ' '; return new String(string, 0, pos); } /** * INTERNAL: Test whether the string is equal to the given region of * the character array. */ public static boolean regionEquals(String str, char[] ch, int start, int length) { if (str.length() != length || start+length > ch.length) return false; char[] strarr = str.toCharArray(); for (int i=0; i<length; i++) if (ch[start+i] != strarr[i]) return false; return true; } /** * INTERNAL: Convert the A-Z characters in the array region to a-z. */ public static void downCaseAscii(char[] ch, int start, int length) { int end = start + length; for (int ix = start; ix < end; ix++) if (ch[ix] >= 'A' && ch[ix] <= 'Z') ch[ix] = (char) (ch[ix] | 0x20); // efficient downcase of char :-) } /** * INTERNAL: Transcode a string mistakenly interpreted as ISO 8859-1 * to one interpreted as UTF-8. */ public static String transcodeUTF8(String original) { try { byte raw[] = original.getBytes("8859_1"); return new String(raw, 0, raw.length, "UTF-8"); } catch (java.io.UnsupportedEncodingException e) { // This should never happen, but it doesn't hurt to be sure throw new OntopiaRuntimeException(e); } } /** * INTERNAL: Transcode a string mistakenly interpreted as ISO 8859-1 * to one interpreted as something else. */ public static String transcode(String original, String encoding) throws java.io.UnsupportedEncodingException { byte raw[] = original.getBytes("8859_1"); return new String(raw, 0, raw.length, encoding); } /** * INTERNAL: Filters the specified string for characters that are * senstive to HTML interpreters, returning the string with these * characters replaced by the corresponding character entities. * * @param value The string to be filtered and returned * * @since 1.3.1 */ public static String escapeHTMLEntities(String value) { if (value == null) return null; char content[] = new char[value.length()]; value.getChars(0, content.length, content, 0); StringBuilder result = new StringBuilder(content.length + 50); for (int i = 0; i < content.length; i++) { switch (content[i]) { case '<': result.append("<"); break; case '>': result.append(">"); break; case '&': result.append("&"); break; case '"': result.append("""); break; case '\'': result.append("'"); break; default: result.append(content[i]); } } return result.toString(); } /** * INTERNAL: Filters the specified string for characters that are * senstive to HTML interpreters, writing the string with these * characters replaced by the corresponding character entities to * the given writer. * * @param value The string to be filtered and written. * * @since 3.0 */ public static void escapeHTMLEntities(String value, Writer out) throws IOException { if (value == null) return; char content[] = new char[value.length()]; value.getChars(0, content.length, content, 0); for (int i = 0; i < content.length; i++) { switch (content[i]) { case '<': out.write("<"); break; case '>': out.write(">"); break; case '&': out.write("&"); break; case '"': out.write("""); break; case '\'': out.write("'"); break; default: out.write(content[i]); } } } /** * INTERNAL: Make a random ID-like string of the given number of * characters. */ public static String makeRandomId(int length) { char[] chars = new char[length]; for (int ix = 0; ix < length; ix++) chars[ix] = (char) (65 + rand.nextInt(26)); return new String(chars); } /** * INTERNAL: Pad characters to the given length. */ public static String pad(int number, char ch, int length) { return pad(Integer.toString(number), ch, length); } /** * INTERNAL: Pad with filler characters in front of a base string to * get the given length. */ public static String pad(String str, char ch, int length) { char[] result = new char[length]; for (int i=0, s=str.length(), p=length-s; i < length ; i++) { if (i < p) result[i] = ch; else result[i] = str.charAt(i-p); } return new String(result); } /** * INTERNAL: Returns a 32 character long hex encoded MD5 digest of * the given string. Hex letters are returned in lower case. */ public static String md5_32(String value) { try { MessageDigest md = MessageDigest.getInstance("MD5"); byte[] digest = md.digest(value.getBytes("UTF-8")); StringBuilder md5hash = new StringBuilder(digest.length * 2); for (int i=0; i < digest.length; i++) { String hex = Integer.toHexString(digest[i] & 0xFF); if (hex.length() == 1) md5hash.append('0'); md5hash.append(hex); } return md5hash.toString(); } catch (Exception e) { throw new OntopiaRuntimeException(e); } } /** * INTERNAL: Returns true if the string is a valid integer. */ public static boolean isInteger(String candidate) { try { Integer.parseInt(candidate); return true; } catch (NumberFormatException e) { return false; } } /** * INTERNAL: Creates a candidate ID from an input string. The * algorithm discards characters above U+00FF, strips accents off * remaining characters, then discards everything that doesn't match * the LTM NAME production (except leading characters, which turn * into underscores). Whitespace is normalized, and turns into * a hyphen when internal to the string. Letters are lowercased. */ public static String normalizeId(String name) { char[] buffer = name.toCharArray(); int outix = 0; boolean whitespacerun = false; boolean firstchar = false; for (int inix = 0; inix < buffer.length; inix++) { char ch = buffer[inix]; // discard high characters if (ch > 0x00FF) continue; // handle whitespace if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { if (!firstchar || whitespacerun) continue; ch = '-'; whitespacerun = true; } else whitespacerun = false; // check mapping table char mapsto = charmap[ch]; if (mapsto == 0 && firstchar) continue; // update buffer if (mapsto == 0) // discards before first NAME char turn into underscores buffer[outix++] = '_'; else { buffer[outix++] = mapsto; firstchar = true; } } // whitespace at end will leave a trailing '-', which needs to go if (whitespacerun) outix--; // leave out last '-' // check if we have a valid name start character first if (outix < 1) return null; else if (isNameStart(buffer[0])) return new String(buffer, 0, outix); else return "_" + new String(buffer, 0, outix); } private static boolean isNameStart(char ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_'; } // Accent-stripping and lowercasing map used for name->ID normalization // Auto-generated from unicodedata.txt static final char[] charmap = new char[]{ 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: SPACE 0, // discarded: EXCLAMATION MARK 0, // discarded: QUOTATION MARK 0, // discarded: NUMBER SIGN 0, // discarded: DOLLAR SIGN 0, // discarded: PERCENT SIGN 0, // discarded: AMPERSAND 0, // discarded: APOSTROPHE 0, // discarded: LEFT PARENTHESIS 0, // discarded: RIGHT PARENTHESIS 0, // discarded: ASTERISK 0, // discarded: PLUS SIGN 0, // discarded: COMMA 45, // untouched: HYPHEN-MINUS 46, // untouched: FULL STOP 0, // discarded: SOLIDUS 48, // untouched: DIGIT ZERO 49, // untouched: DIGIT ONE 50, // untouched: DIGIT TWO 51, // untouched: DIGIT THREE 52, // untouched: DIGIT FOUR 53, // untouched: DIGIT FIVE 54, // untouched: DIGIT SIX 55, // untouched: DIGIT SEVEN 56, // untouched: DIGIT EIGHT 57, // untouched: DIGIT NINE 0, // discarded: COLON 0, // discarded: SEMICOLON 0, // discarded: LESS-THAN SIGN 0, // discarded: EQUALS SIGN 0, // discarded: GREATER-THAN SIGN 0, // discarded: QUESTION MARK 0, // discarded: COMMERCIAL AT 97, // mapped: LATIN CAPITAL LETTER A -> LATIN SMALL LETTER A 98, // mapped: LATIN CAPITAL LETTER B -> LATIN SMALL LETTER B 99, // mapped: LATIN CAPITAL LETTER C -> LATIN SMALL LETTER C 100, // mapped: LATIN CAPITAL LETTER D -> LATIN SMALL LETTER D 101, // mapped: LATIN CAPITAL LETTER E -> LATIN SMALL LETTER E 102, // mapped: LATIN CAPITAL LETTER F -> LATIN SMALL LETTER F 103, // mapped: LATIN CAPITAL LETTER G -> LATIN SMALL LETTER G 104, // mapped: LATIN CAPITAL LETTER H -> LATIN SMALL LETTER H 105, // mapped: LATIN CAPITAL LETTER I -> LATIN SMALL LETTER I 106, // mapped: LATIN CAPITAL LETTER J -> LATIN SMALL LETTER J 107, // mapped: LATIN CAPITAL LETTER K -> LATIN SMALL LETTER K 108, // mapped: LATIN CAPITAL LETTER L -> LATIN SMALL LETTER L 109, // mapped: LATIN CAPITAL LETTER M -> LATIN SMALL LETTER M 110, // mapped: LATIN CAPITAL LETTER N -> LATIN SMALL LETTER N 111, // mapped: LATIN CAPITAL LETTER O -> LATIN SMALL LETTER O 112, // mapped: LATIN CAPITAL LETTER P -> LATIN SMALL LETTER P 113, // mapped: LATIN CAPITAL LETTER Q -> LATIN SMALL LETTER Q 114, // mapped: LATIN CAPITAL LETTER R -> LATIN SMALL LETTER R 115, // mapped: LATIN CAPITAL LETTER S -> LATIN SMALL LETTER S 116, // mapped: LATIN CAPITAL LETTER T -> LATIN SMALL LETTER T 117, // mapped: LATIN CAPITAL LETTER U -> LATIN SMALL LETTER U 118, // mapped: LATIN CAPITAL LETTER V -> LATIN SMALL LETTER V 119, // mapped: LATIN CAPITAL LETTER W -> LATIN SMALL LETTER W 120, // mapped: LATIN CAPITAL LETTER X -> LATIN SMALL LETTER X 121, // mapped: LATIN CAPITAL LETTER Y -> LATIN SMALL LETTER Y 122, // mapped: LATIN CAPITAL LETTER Z -> LATIN SMALL LETTER Z 0, // discarded: LEFT SQUARE BRACKET 0, // discarded: REVERSE SOLIDUS 0, // discarded: RIGHT SQUARE BRACKET 0, // discarded: CIRCUMFLEX ACCENT 95, // untouched: LOW LINE 0, // discarded: GRAVE ACCENT 97, // untouched: LATIN SMALL LETTER A 98, // untouched: LATIN SMALL LETTER B 99, // untouched: LATIN SMALL LETTER C 100, // untouched: LATIN SMALL LETTER D 101, // untouched: LATIN SMALL LETTER E 102, // untouched: LATIN SMALL LETTER F 103, // untouched: LATIN SMALL LETTER G 104, // untouched: LATIN SMALL LETTER H 105, // untouched: LATIN SMALL LETTER I 106, // untouched: LATIN SMALL LETTER J 107, // untouched: LATIN SMALL LETTER K 108, // untouched: LATIN SMALL LETTER L 109, // untouched: LATIN SMALL LETTER M 110, // untouched: LATIN SMALL LETTER N 111, // untouched: LATIN SMALL LETTER O 112, // untouched: LATIN SMALL LETTER P 113, // untouched: LATIN SMALL LETTER Q 114, // untouched: LATIN SMALL LETTER R 115, // untouched: LATIN SMALL LETTER S 116, // untouched: LATIN SMALL LETTER T 117, // untouched: LATIN SMALL LETTER U 118, // untouched: LATIN SMALL LETTER V 119, // untouched: LATIN SMALL LETTER W 120, // untouched: LATIN SMALL LETTER X 121, // untouched: LATIN SMALL LETTER Y 122, // untouched: LATIN SMALL LETTER Z 0, // discarded: LEFT CURLY BRACKET 0, // discarded: VERTICAL LINE 0, // discarded: RIGHT CURLY BRACKET 0, // discarded: TILDE 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: <control> 0, // discarded: NO-BREAK SPACE 0, // discarded: INVERTED EXCLAMATION MARK 0, // discarded: CENT SIGN 0, // discarded: POUND SIGN 0, // discarded: CURRENCY SIGN 0, // discarded: YEN SIGN 0, // discarded: BROKEN BAR 0, // discarded: SECTION SIGN 0, // discarded: DIAERESIS 0, // discarded: COPYRIGHT SIGN 97, // mapped: FEMININE ORDINAL INDICATOR -> LATIN SMALL LETTER A 0, // discarded: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0, // discarded: NOT SIGN 0, // discarded: SOFT HYPHEN 0, // discarded: REGISTERED SIGN 0, // discarded: MACRON 0, // discarded: DEGREE SIGN 0, // discarded: PLUS-MINUS SIGN 0, // discarded: SUPERSCRIPT TWO 0, // discarded: SUPERSCRIPT THREE 0, // discarded: ACUTE ACCENT 0, // discarded: MICRO SIGN 0, // discarded: PILCROW SIGN 0, // discarded: MIDDLE DOT 0, // discarded: CEDILLA 0, // discarded: SUPERSCRIPT ONE 111, // mapped: MASCULINE ORDINAL INDICATOR -> LATIN SMALL LETTER O 0, // discarded: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0, // discarded: VULGAR FRACTION ONE QUARTER 0, // discarded: VULGAR FRACTION ONE HALF 0, // discarded: VULGAR FRACTION THREE QUARTERS 0, // discarded: INVERTED QUESTION MARK 97, // mapped: LATIN CAPITAL LETTER A WITH GRAVE -> LATIN SMALL LETTER A 97, // mapped: LATIN CAPITAL LETTER A WITH ACUTE -> LATIN SMALL LETTER A 97, // mapped: LATIN CAPITAL LETTER A WITH CIRCUMFLEX -> LATIN SMALL LETTER A 97, // mapped: LATIN CAPITAL LETTER A WITH TILDE -> LATIN SMALL LETTER A 97, // mapped: LATIN CAPITAL LETTER A WITH DIAERESIS -> LATIN SMALL LETTER A 97, // mapped: LATIN CAPITAL LETTER A WITH RING ABOVE -> LATIN SMALL LETTER A 101, // mapped: LATIN CAPITAL LETTER AE -> LATIN SMALL LETTER E 99, // mapped: LATIN CAPITAL LETTER C WITH CEDILLA -> LATIN SMALL LETTER C 101, // mapped: LATIN CAPITAL LETTER E WITH GRAVE -> LATIN SMALL LETTER E 101, // mapped: LATIN CAPITAL LETTER E WITH ACUTE -> LATIN SMALL LETTER E 101, // mapped: LATIN CAPITAL LETTER E WITH CIRCUMFLEX -> LATIN SMALL LETTER E 101, // mapped: LATIN CAPITAL LETTER E WITH DIAERESIS -> LATIN SMALL LETTER E 105, // mapped: LATIN CAPITAL LETTER I WITH GRAVE -> LATIN SMALL LETTER I 105, // mapped: LATIN CAPITAL LETTER I WITH ACUTE -> LATIN SMALL LETTER I 105, // mapped: LATIN CAPITAL LETTER I WITH CIRCUMFLEX -> LATIN SMALL LETTER I 105, // mapped: LATIN CAPITAL LETTER I WITH DIAERESIS -> LATIN SMALL LETTER I 0, // discarded: LATIN CAPITAL LETTER ETH 110, // mapped: LATIN CAPITAL LETTER N WITH TILDE -> LATIN SMALL LETTER N 111, // mapped: LATIN CAPITAL LETTER O WITH GRAVE -> LATIN SMALL LETTER O 111, // mapped: LATIN CAPITAL LETTER O WITH ACUTE -> LATIN SMALL LETTER O 111, // mapped: LATIN CAPITAL LETTER O WITH CIRCUMFLEX -> LATIN SMALL LETTER O 111, // mapped: LATIN CAPITAL LETTER O WITH TILDE -> LATIN SMALL LETTER O 111, // mapped: LATIN CAPITAL LETTER O WITH DIAERESIS -> LATIN SMALL LETTER O 0, // discarded: MULTIPLICATION SIGN 111, // mapped: LATIN CAPITAL LETTER O WITH STROKE -> LATIN SMALL LETTER O 117, // mapped: LATIN CAPITAL LETTER U WITH GRAVE -> LATIN SMALL LETTER U 117, // mapped: LATIN CAPITAL LETTER U WITH ACUTE -> LATIN SMALL LETTER U 117, // mapped: LATIN CAPITAL LETTER U WITH CIRCUMFLEX -> LATIN SMALL LETTER U 117, // mapped: LATIN CAPITAL LETTER U WITH DIAERESIS -> LATIN SMALL LETTER U 121, // mapped: LATIN CAPITAL LETTER Y WITH ACUTE -> LATIN SMALL LETTER Y 0, // discarded: LATIN CAPITAL LETTER THORN 0, // discarded: LATIN SMALL LETTER SHARP S 97, // mapped: LATIN SMALL LETTER A WITH GRAVE -> LATIN SMALL LETTER A 97, // mapped: LATIN SMALL LETTER A WITH ACUTE -> LATIN SMALL LETTER A 97, // mapped: LATIN SMALL LETTER A WITH CIRCUMFLEX -> LATIN SMALL LETTER A 97, // mapped: LATIN SMALL LETTER A WITH TILDE -> LATIN SMALL LETTER A 97, // mapped: LATIN SMALL LETTER A WITH DIAERESIS -> LATIN SMALL LETTER A 97, // mapped: LATIN SMALL LETTER A WITH RING ABOVE -> LATIN SMALL LETTER A 101, // mapped: LATIN SMALL LETTER AE -> LATIN SMALL LETTER E 99, // mapped: LATIN SMALL LETTER C WITH CEDILLA -> LATIN SMALL LETTER C 101, // mapped: LATIN SMALL LETTER E WITH GRAVE -> LATIN SMALL LETTER E 101, // mapped: LATIN SMALL LETTER E WITH ACUTE -> LATIN SMALL LETTER E 101, // mapped: LATIN SMALL LETTER E WITH CIRCUMFLEX -> LATIN SMALL LETTER E 101, // mapped: LATIN SMALL LETTER E WITH DIAERESIS -> LATIN SMALL LETTER E 105, // mapped: LATIN SMALL LETTER I WITH GRAVE -> LATIN SMALL LETTER I 105, // mapped: LATIN SMALL LETTER I WITH ACUTE -> LATIN SMALL LETTER I 105, // mapped: LATIN SMALL LETTER I WITH CIRCUMFLEX -> LATIN SMALL LETTER I 105, // mapped: LATIN SMALL LETTER I WITH DIAERESIS -> LATIN SMALL LETTER I 0, // discarded: LATIN SMALL LETTER ETH 110, // mapped: LATIN SMALL LETTER N WITH TILDE -> LATIN SMALL LETTER N 111, // mapped: LATIN SMALL LETTER O WITH GRAVE -> LATIN SMALL LETTER O 111, // mapped: LATIN SMALL LETTER O WITH ACUTE -> LATIN SMALL LETTER O 111, // mapped: LATIN SMALL LETTER O WITH CIRCUMFLEX -> LATIN SMALL LETTER O 111, // mapped: LATIN SMALL LETTER O WITH TILDE -> LATIN SMALL LETTER O 111, // mapped: LATIN SMALL LETTER O WITH DIAERESIS -> LATIN SMALL LETTER O 0, // discarded: DIVISION SIGN 111, // mapped: LATIN SMALL LETTER O WITH STROKE -> LATIN SMALL LETTER O 117, // mapped: LATIN SMALL LETTER U WITH GRAVE -> LATIN SMALL LETTER U 117, // mapped: LATIN SMALL LETTER U WITH ACUTE -> LATIN SMALL LETTER U 117, // mapped: LATIN SMALL LETTER U WITH CIRCUMFLEX -> LATIN SMALL LETTER U 117, // mapped: LATIN SMALL LETTER U WITH DIAERESIS -> LATIN SMALL LETTER U 121, // mapped: LATIN SMALL LETTER Y WITH ACUTE -> LATIN SMALL LETTER Y 0, // discarded: LATIN SMALL LETTER THORN 121, // mapped: LATIN SMALL LETTER Y WITH DIAERESIS -> LATIN SMALL LETTER Y }; }