/** * Copyright (c) 2012-2016 André Bargull * Alle Rechte vorbehalten / All Rights Reserved. Use is subject to license terms. * * <https://github.com/anba/es6draft> */ package com.github.anba.es6draft.runtime.objects.intl; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.ibm.icu.util.TimeZone; import com.ibm.icu.util.TimeZone.SystemTimeZoneType; /** * Simple tools to generate the various language data for the intl package */ final class IntlDataTools { private IntlDataTools() { } public static void main(String[] args) throws IOException { // Path cldrMainDir = java.nio.file.Paths.get("/tmp/cldr-2.0.0-core--main"); // oldStyleLanguageTags(cldrMainDir); // Path currencyFile = java.nio.file.Paths.get("/tmp/iso_currency.xml"); // currencyDigits(currencyFile); // Path tzdataDir = java.nio.file.Paths.get("/tmp/tzdata2013c.tar"); // jdkTimezoneNames(tzdataDir); // Path langSubtagReg = java.nio.file.Paths.get("/tmp/language-subtag-registry.txt"); // languageSubtagRegistry(langSubtagReg); } /** * {@link LanguageSubtagRegistryData} * * @param langSubtagReg * the language subtag registry file * @throws IOException * if an I/O error occurs */ static void languageSubtagRegistry(Path langSubtagReg) throws IOException { List<String> lines = Files.readAllLines(langSubtagReg, StandardCharsets.UTF_8); ArrayDeque<String> stack = new ArrayDeque<>(lines); ArrayList<Record> language = new ArrayList<>(); ArrayList<Record> region = new ArrayList<>(); ArrayList<Record> grandfathered = new ArrayList<>(); ArrayList<Record> redundant = new ArrayList<>(); ArrayList<Record> extlang = new ArrayList<>(); ArrayList<Record> script = new ArrayList<>(); ArrayList<Record> variant = new ArrayList<>(); // skip first two lines (file date + %% separator) stack.pop(); stack.pop(); while (!stack.isEmpty()) { Record rec = readRecord(stack); String type = rec.get(Field.Type); assert type != null; if ("language".equals(type)) { if (rec.has(Field.PreferredValue)) { language.add(rec); } } if ("region".equals(type)) { if (rec.has(Field.PreferredValue)) { region.add(rec); } } if ("grandfathered".equals(type)) { grandfathered.add(rec); } if ("redundant".equals(type)) { if (rec.has(Field.PreferredValue)) { redundant.add(rec); } } if ("extlang".equals(type)) { if (rec.has(Field.PreferredValue)) { extlang.add(rec); } } if ("script".equals(type)) { if (rec.has(Field.PreferredValue)) { script.add(rec); } } if ("variant".equals(type)) { if (rec.has(Field.PreferredValue)) { variant.add(rec); } } } /* Generate LanguageSubtagRegistryData#scriptData entries */ System.out.println("--- [LanguageSubtagRegistryData#scriptData] ---"); for (Record record : script) { assert record.has(Field.Prefix); System.out.printf("%s -> %s [%s]%n", record.get(Field.Subtag), record.get(Field.PreferredValue), record.get(Field.Prefix)); } System.out.println(); assert script.isEmpty() : "no preferred values for 'script' expected"; /* Generate LanguageSubtagRegistryData#extlangData entries */ System.out.println("--- [LanguageSubtagRegistryData#extlangData] ---"); for (Record record : extlang) { assert record.has(Field.Prefix); assert record.get(Field.Subtag).equals(record.get(Field.PreferredValue)) : record.get(Field.Subtag); System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Subtag), record.get(Field.Prefix)); } System.out.println(); /* Generate LanguageSubtagRegistryData#variantData entries */ System.out.println("--- [LanguageSubtagRegistryData#variantData] ---"); for (Record record : variant) { assert record.has(Field.Prefix); System.out.printf("%s -> %s [%s]%n", record.get(Field.Subtag), record.get(Field.PreferredValue), record.get(Field.Prefix)); System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Subtag), record.get(Field.PreferredValue)); } System.out.println(); assert variant.size() == 1 : "Only one variant entry expected"; assert variant.get(0).get(Field.Subtag).equals("heploc"); assert variant.get(0).get(Field.PreferredValue).equals("alalc97"); /* Generate LanguageSubtagRegistryData#regionData entries */ System.out.println("--- [LanguageSubtagRegistryData#regionData] ---"); for (Record record : region) { assert !record.has(Field.Prefix); System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Subtag).toLowerCase(Locale.ROOT), record.get(Field.PreferredValue)); } System.out.println(); /* Generate LanguageSubtagRegistryData#languageData entries */ System.out.println("--- [LanguageSubtagRegistryData#languageData] ---"); for (Record record : language) { assert !record.has(Field.Prefix); System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Subtag), record.get(Field.PreferredValue)); } System.out.println(); /* Generate LanguageSubtagRegistryData#grandfatheredData entries */ System.out.println("--- [LanguageSubtagRegistryData#grandfatheredData] ---"); for (Record record : grandfathered) { assert !record.has(Field.Prefix); if (record.has(Field.PreferredValue)) { System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Tag).toLowerCase(Locale.ROOT), record.get(Field.PreferredValue)); } else { System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Tag).toLowerCase(Locale.ROOT), record.get(Field.Tag)); } } System.out.println(); /* Generate LanguageSubtagRegistryData#redundantData entries */ System.out.println("--- [LanguageSubtagRegistryData#redundantData] ---"); for (Record record : redundant) { assert !record.has(Field.Prefix); System.out.printf("map.put(\"%s\", \"%s\");%n", record.get(Field.Tag).toLowerCase(Locale.ROOT), record.get(Field.PreferredValue)); } System.out.println(); } private enum Field { Type("Type"), Tag("Tag"), Subtag("Subtag"), Description("Description"), Added("Added"), Deprecated("Deprecated"), PreferredValue("Preferred-Value"), Prefix("Prefix"), SupressScript("Suppress-Script"), Macrolanguage("Macrolanguage"), Scope("Scope"), Comments("Comments"); private final String name; private Field(String name) { this.name = name; } public String getName() { return name; } static final HashMap<String, Field> byName; static { HashMap<String, Field> map = new HashMap<>(); for (Field field : Field.values()) { map.put(field.getName(), field); } byName = map; } public static Field forName(String name) { return byName.get(name); } } private static final class Record { EnumMap<Field, String> entries = new EnumMap<>(Field.class); boolean has(Field field) { return entries.containsKey(field); } String get(Field field) { return entries.get(field); } } private static Record readRecord(ArrayDeque<String> stack) { Record rec = new Record(); for (;;) { if (stack.isEmpty()) { return rec; } String line = stack.pop(); assert !line.isEmpty(); if ("%%".equals(line)) { return rec; } if (line.charAt(0) == ' ') { // continuation continue; } int sep = line.indexOf(':'); String name = line.substring(0, sep).trim(); String value = line.substring(sep + 1).trim(); Field field = Field.forName(name); assert field != null; switch (field) { case Deprecated: case PreferredValue: case Prefix: case Subtag: case Tag: case Type: rec.entries.put(field, value); break; case Added: case Comments: case Description: case Macrolanguage: case Scope: case SupressScript: default: // ignore these break; } } } /** * {@link IntlAbstractOperations#JDK_TIMEZONE_NAMES} * * @param tzdataDir * the tzdata directory * @throws IOException * if an I/O error occurs */ static void jdkTimezoneNames(Path tzdataDir) throws IOException { Pattern pZone = Pattern.compile("Zone\\s+([a-zA-Z0-9_+\\-/]+)\\s+.*"); Pattern pLink = Pattern.compile("Link\\s+([a-zA-Z0-9_+\\-/]+)\\s+([a-zA-Z0-9_+\\-/]+)(?:\\s+#.*)?"); Pattern pFileName = Pattern.compile("[a-z0-9]+"); HashSet<String> ignoreFiles = new HashSet<>(Arrays.asList("backzone")); TreeSet<String> names = new TreeSet<>(); TreeMap<String, String> links = new TreeMap<>(); try (DirectoryStream<Path> stream = Files.newDirectoryStream(tzdataDir)) { for (Path path : stream) { String filename = Objects.requireNonNull(path.getFileName()).toString(); if (pFileName.matcher(filename).matches() && !ignoreFiles.contains(filename)) { try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) { for (String line; (line = reader.readLine()) != null;) { if (line.startsWith("Zone")) { Matcher m = pZone.matcher(line); if (!m.matches()) { System.out.println(line); } String name = m.group(1); boolean changed = names.add(name); assert changed : line; } else if (line.startsWith("Link")) { Matcher m = pLink.matcher(line); if (!m.matches()) { System.out.println(line); } String target = m.group(1); String source = m.group(2); boolean changed = links.put(source, target) == null; assert changed : String.format("%s: %s", filename, line); } } } } } } TreeSet<String> allnames = new TreeSet<>(); allnames.addAll(names); for (Map.Entry<String, String> link : links.entrySet()) { assert allnames.contains(link.getValue()); boolean changed = allnames.add(link.getKey()); assert changed : link; } TreeSet<String> ids = new TreeSet<>(TimeZone.getAvailableIDs(SystemTimeZoneType.ANY, null, null)); for (String id : new HashSet<>(ids)) { if (id.startsWith("SystemV/")) { ids.remove(id); } } System.out.println(allnames); System.out.println(ids.size()); System.out.println(allnames.size()); TreeSet<String> jdkTimeZones = new TreeSet<>(ids); jdkTimeZones.removeAll(allnames); for (String name : jdkTimeZones) { System.out.printf("\"%s\",", name); } } /** * {@link NumberFormatConstructor#CurrencyDigits(String)} * * @param currencyFile * the currency xml-file * @throws IOException * if an I/O error occurs */ static void currencyDigits(Path currencyFile) throws IOException { try (Reader reader = Files.newBufferedReader(currencyFile, StandardCharsets.UTF_8)) { LinkedHashMap<String, Integer> map = new LinkedHashMap<>(); Document xml = xml(reader); NodeList list = xml.getDocumentElement().getElementsByTagName("CcyNtry"); for (int i = 0, len = list.getLength(); i < len; ++i) { Element item = (Element) list.item(i); Element code = getElementByTagName(item, "Ccy"); Element minor = getElementByTagName(item, "CcyMnrUnts"); if (code == null) { continue; } String scode = code.getTextContent(); int iminor = 2; try { iminor = Integer.parseInt(minor.getTextContent()); } catch (NumberFormatException e) { } if (map.containsKey(scode) && map.get(scode) != iminor) { System.err.println(scode); } if (iminor != 2 && !map.containsKey(scode)) { map.put(scode, iminor); } } TreeMap<Integer, List<String>> sorted = new TreeMap<>(); for (Map.Entry<String, Integer> entry : map.entrySet()) { List<String> currencies = sorted.get(entry.getValue()); if (currencies == null) { currencies = new ArrayList<>(); } currencies.add(entry.getKey()); sorted.put(entry.getValue(), currencies); } for (Map.Entry<Integer, List<String>> entry : sorted.entrySet()) { Collections.sort(entry.getValue()); for (String c : entry.getValue()) { System.out.printf("case \"%s\":%n", c); } System.out.printf(" return %d;%n", entry.getKey()); } System.out.println("default:\n return 2;"); } } /** * {@link IntlAbstractOperations#oldStyleLanguageTags} * * @param cldrMainDir * the CLDR main directory * @throws IOException * if an I/O error occurs */ static void oldStyleLanguageTags(Path cldrMainDir) throws IOException { try (DirectoryStream<Path> newDirectoryStream = Files.newDirectoryStream(cldrMainDir)) { Map<String, String> names = new LinkedHashMap<>(); Map<String, String> aliased = new LinkedHashMap<>(); for (Path path : newDirectoryStream) { try (Reader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) { Document xml = xml(reader); Element identity = getElementByTagName(xml.getDocumentElement(), "identity"); assert identity != null; Element language = getElementByTagName(xml.getDocumentElement(), "language"); Element script = getElementByTagName(xml.getDocumentElement(), "script"); Element territory = getElementByTagName(xml.getDocumentElement(), "territory"); String tag = language.getAttribute("type"); if (script != null) { tag += "-" + script.getAttribute("type"); } if (territory != null) { tag += "-" + territory.getAttribute("type"); } String filename = Objects.requireNonNull(path.getFileName()).toString(); filename = filename.substring(0, filename.lastIndexOf('.')); names.put(filename, tag); Element alias = getElementByTagName(xml.getDocumentElement(), "alias"); if (alias != null && script == null && territory != null) { aliased.put(tag, alias.getAttribute("source")); } } } Map<String, String> result = new LinkedHashMap<>(); for (Map.Entry<String, String> entry : aliased.entrySet()) { String from = entry.getKey(); String to = names.get(entry.getValue()); String value = result.get(to); if (value == null) { value = ""; } else { value += ", "; } value += "\"" + from + "\""; result.put(to, value); } for (Map.Entry<String, String> entry : result.entrySet()) { System.out.printf("map.put(\"%s\", new String[]{%s});%n", entry.getKey(), entry.getValue()); } } } private static Element getElementByTagName(Element element, String tagName) { return (Element) element.getElementsByTagName(tagName).item(0); } private static Document xml(Reader xml) throws IOException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // turn off any validation or namespace features factory.setNamespaceAware(false); factory.setValidating(false); List<String> features = Arrays.asList("http://xml.org/sax/features/namespaces", "http://xml.org/sax/features/validation", "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", "http://apache.org/xml/features/nonvalidating/load-external-dtd"); for (String feature : features) { try { factory.setFeature(feature, false); } catch (ParserConfigurationException e) { // ignore invalid feature names } } try { DocumentBuilder builder = factory.newDocumentBuilder(); InputSource source = new InputSource(xml); return builder.parse(source); } catch (ParserConfigurationException | SAXException e) { throw new IOException(e); } } }