/* * Copyright (c) 2006 Henri Sivonen * Copyright (c) 2007-2010 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package org.whattf.datatype.data; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Pattern; public class LanguageData { private static final Pattern HYPHEN = Pattern.compile("-"); private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = {}; private static final String[] EMPTY_STRING_ARRAY = {}; private static final String PREFIX = "prefix: "; private static final String SUPPRESS_SCRIPT = "suppress-script: "; private static final String SUBTAG = "subtag: "; private static final String TAG = "tag: "; private static final String TYPE = "type: "; private static final String DEPRECATED = "deprecated: "; private static final String PREFERRED_VALUE = "preferred-value: "; private BufferedReader in; private SortedSet<String> languageSet = new TreeSet<String>(); private SortedSet<String> extlangSet = new TreeSet<String>(); private SortedSet<String> scriptSet = new TreeSet<String>(); private SortedSet<String> regionSet = new TreeSet<String>(); private SortedSet<String> variantSet = new TreeSet<String>(); private SortedSet<String> grandfatheredSet = new TreeSet<String>(); private SortedSet<String> redundantSet = new TreeSet<String>(); private SortedSet<String> deprecatedLangSet = new TreeSet<String>(); private SortedSet<String> deprecatedSet = new TreeSet<String>(); private Map<String, String> suppressedScriptByLanguageMap = new HashMap<String, String>(); private Map<String, String> prefixByExtlangMap = new HashMap<String, String>(); private Map<String, String> preferredValueByLanguageMap = new HashMap<String, String>(); private Map<String, Set<String[]>> prefixesByVariantMap = new HashMap<String, Set<String[]>>(); private String[] languages = null; private String[] extlangs = null; private String[] scripts = null; private String[] regions = null; private String[] variants = null; private String[] grandfathered = null; private String[] redundant = null; private String[] deprecatedLang = null; private String[] deprecated = null; private int[] suppressedScriptByLanguage = null; private int[] prefixByExtlang = null; private String[][][] prefixesByVariant = null; public LanguageData() throws IOException { super(); in = new BufferedReader( new InputStreamReader( LanguageData.class.getClassLoader().getResourceAsStream( "nu/validator/localentities/files/language-subtag-registry"), "UTF-8")); consumeRegistry(); prepareArrays(); } private void consumeRegistry() throws IOException { while (consumeRecord()) { // spin } in.close(); } private void prepareArrays() throws IOException { scripts = scriptSet.toArray(EMPTY_STRING_ARRAY); regions = regionSet.toArray(EMPTY_STRING_ARRAY); grandfathered = grandfatheredSet.toArray(EMPTY_STRING_ARRAY); redundant = redundantSet.toArray(EMPTY_STRING_ARRAY); deprecated = deprecatedSet.toArray(EMPTY_STRING_ARRAY); deprecatedLang = deprecatedLangSet.toArray(EMPTY_STRING_ARRAY); int i = 0; languages = new String[languageSet.size()]; suppressedScriptByLanguage = new int[languageSet.size()]; for (String language : languageSet) { languages[i] = language; String suppressed = suppressedScriptByLanguageMap.get(language); if (suppressed == null) { suppressedScriptByLanguage[i] = -1; } else { int index = Arrays.binarySearch(scripts, suppressed); if (index < 0) { throw new IOException( "Malformed registry: reference to non-existent script."); } suppressedScriptByLanguage[i] = index; } i++; } i = 0; extlangs = new String[extlangSet.size()]; prefixByExtlang = new int[extlangSet.size()]; for (String extlang : extlangSet) { extlangs[i] = extlang; String prefix = prefixByExtlangMap.get(extlang); if (prefix == null) { prefixByExtlang[i] = -1; } else { int index = Arrays.binarySearch(languages, prefix); if (index < 0) { throw new IOException( "Malformed registry: reference to non-existent prefix for extlang."); } prefixByExtlang[i] = index; } i++; } i = 0; variants = new String[variantSet.size()]; prefixesByVariant = new String[variantSet.size()][][]; for (String variant : variantSet) { variants[i] = variant; Set<String[]> prefixes = prefixesByVariantMap.get(variant); if (prefixes != null) { prefixesByVariant[i] = prefixes.toArray(EMPTY_DOUBLE_STRING_ARRAY); } else { prefixesByVariant[i] = EMPTY_DOUBLE_STRING_ARRAY; } i++; } } private boolean consumeRecord() throws IOException { boolean hasMore = true; String type = null; String subtag = null; String suppressScript = null; String preferredValue = null; Set<String[]> prefixes = new HashSet<String[]>(); String singlePrefix = null; boolean depr = false; String line = null; for (;;) { line = in.readLine(); if (line == null) { hasMore = false; break; } line = line.toLowerCase(); if ("%%".equals(line)) { break; } else if (line.startsWith(TYPE)) { type = line.substring(TYPE.length()).trim().intern(); } else if (line.startsWith(SUBTAG)) { subtag = line.substring(SUBTAG.length()).trim().intern(); } else if (line.startsWith(TAG)) { subtag = line.substring(TAG.length()).trim().intern(); } else if (line.startsWith(SUPPRESS_SCRIPT)) { suppressScript = line.substring(SUPPRESS_SCRIPT.length()).trim().intern(); } else if (line.startsWith(PREFIX)) { String[] prefixSubtags = HYPHEN.split(line.substring( PREFIX.length()).trim()); for (int i = 0; i < prefixSubtags.length; i++) { prefixSubtags[i] = prefixSubtags[i].intern(); } prefixes.add(prefixSubtags); singlePrefix = prefixSubtags[0]; } else if (line.startsWith(DEPRECATED)) { depr = true; } else if (line.startsWith(PREFERRED_VALUE)) { preferredValue = line.substring(PREFERRED_VALUE.length()).trim().intern(); preferredValueByLanguageMap.put(subtag, preferredValue); } } if (subtag == null) { return hasMore; } if (depr) { if ("language" == type) { deprecatedLangSet.add(subtag); } else { deprecatedSet.add(subtag); } } if ("language" == type) { languageSet.add(subtag); suppressedScriptByLanguageMap.put(subtag, suppressScript); } if ("extlang" == type) { extlangSet.add(subtag); prefixByExtlangMap.put(subtag, singlePrefix); } else if ("region" == type) { regionSet.add(subtag); } else if ("script" == type) { scriptSet.add(subtag); } else if ("variant" == type) { variantSet.add(subtag); prefixesByVariantMap.put(subtag, prefixes); } else if ("grandfathered" == type) { grandfatheredSet.add(subtag); } else if ("redundant" == type) { redundantSet.add(subtag); } return hasMore; } /** * Returns the languages. * * @return the languages */ public String[] getLanguages() { return languages; } public String[] getExtlangs() { return extlangs; } /** * Returns the prefixesByVariant. * * @return the prefixesByVariant */ public String[][][] getPrefixesByVariant() { return prefixesByVariant; } public int[] getPrefixByExtlang() { return prefixByExtlang; } /** * Returns the regions. * * @return the regions */ public String[] getRegions() { return regions; } /** * Returns the scripts. * * @return the scripts */ public String[] getScripts() { return scripts; } /** * Returns the suppressedScriptByLanguage. * * @return the suppressedScriptByLanguage */ public int[] getSuppressedScriptByLanguage() { return suppressedScriptByLanguage; } /** * Returns the variants. * * @return the variants */ public String[] getVariants() { return variants; } /** * Returns the deprecated. * * @return the deprecated */ public String[] getDeprecated() { return deprecated; } /** * Returns the preferredValueByLanguageMap. * * @return the preferredValueByLanguageMap */ public Map<String, String> getPreferredValueByLanguageMap() { return preferredValueByLanguageMap; } /** * Returns the grandfathered. * * @return the grandfathered */ public String[] getGrandfathered() { return grandfathered; } /** * Returns the redundant. * * @return the redundant */ public String[] getRedundant() { return redundant; } /** * Returns the deprecatedLang. * * @return the deprecatedLang */ public String[] getDeprecatedLang() { return deprecatedLang; } }