/* * Copyright (c) 2008-2009 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package nu.validator.htmlparser.generator; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class GenerateNamedCharacters { private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); private static final Pattern LINE_PATTERN = Pattern.compile("<td> <code title=\"\">([^<]*)</code> </td> <td> U\\+(\\S*) (?:U\\+(\\S*) )?</td>"); private static String toUString(int c) { String hexString = Integer.toHexString(c); switch (hexString.length()) { case 1: return "\\u000" + hexString; case 2: return "\\u00" + hexString; case 3: return "\\u0" + hexString; case 4: return "\\u" + hexString; default: throw new RuntimeException("Unreachable."); } } private static int charToIndex(char c) { if (c >= 'a' && c <= 'z') { return c - 'a' + 26; } else if (c >= 'A' && c <= 'Z') { return c - 'A'; } throw new IllegalArgumentException("Bad char in named character name: " + c); } private static boolean allZero(int[] arr) { for (int i = 0; i < arr.length; i++) { if (arr[i] != 0) { return false; } } return true; } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { TreeMap<String, String> entities = new TreeMap<String, String>(); BufferedReader reader = new BufferedReader(new InputStreamReader( System.in, "utf-8")); String line; while ((line = reader.readLine()) != null) { Matcher m = LINE_PATTERN.matcher(line); while (m.find()) { String value; if (m.group(3) != null) { // two BMP chars int firstIntVal = Integer.parseInt(m.group(2), 16); int secondIntVal = Integer.parseInt(m.group(3), 16); value = ("" + (char)firstIntVal) + (char)secondIntVal; } else { // one code point int intVal = Integer.parseInt(m.group(2), 16); if (intVal <= 0xFFFF) { value = "" + (char)intVal; } else { int high = (LEAD_OFFSET + (intVal >> 10)); int low = (0xDC00 + (intVal & 0x3FF)); value = ("" + (char)high) + (char)low; } } entities.put(m.group(1), value); } } // Java initializes arrays to zero. Zero is our magic value for no hilo // value. int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; String firstName = entities.entrySet().iterator().next().getKey(); int firstKey = charToIndex(firstName.charAt(0)); int secondKey = firstName.charAt(1); int row = 0; int lo = 0; System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n"); for (Map.Entry<String, String> entity : entities.entrySet()) { String name = entity.getKey(); int newFirst = charToIndex(name.charAt(0)); int newSecond = name.charAt(1); assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; if (firstKey != newFirst || secondKey != newSecond) { hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; lo = row; firstKey = newFirst; secondKey = newSecond; } System.out.print("\""); System.out.print(name.substring(2)); System.out.print("\",\n"); row++; } System.out.print("};\n"); hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; System.out.print("static final @NoLength char[][] VALUES = {\n"); for (Map.Entry<String, String> entity : entities.entrySet()) { String value = entity.getValue(); System.out.print("{"); if (value.length() == 1) { char c = value.charAt(0); if (c == '\'') { System.out.print("\'\\\'\'"); } else if (c == '\n') { System.out.print("\'\\n\'"); } else if (c == '\\') { System.out.print("\'\\\\\'"); } else if (c <= 0xFFFF) { System.out.print("\'"); System.out.print(toUString(c)); System.out.print("\'"); } } else { System.out.print("\'"); System.out.print(toUString(value.charAt(0))); System.out.print("\', \'"); System.out.print(toUString(value.charAt(1))); System.out.print("\'"); } System.out.print("},\n"); } System.out.print("};\n"); System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n"); for (int i = 0; i < hiLoTable.length; i++) { if (allZero(hiLoTable[i])) { System.out.print("null,\n"); } else { System.out.print("{"); for (int j = 0; j < hiLoTable[i].length; j++) { System.out.print(hiLoTable[i][j]); System.out.print(", "); } System.out.print("},\n"); } } System.out.print("};\n"); } }