/* * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /** * @test * @bug 8008738 * @summary checks that the mapping implemented by * com.sun.org.apache.xml.internal.serializer.Encodings * correctly identifies valid Charset names and * correctly maps them to their preferred mime names. * Also checks that the Encodings.properties resource file * is consistent. * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java * @run main CheckEncodingPropertiesFile * @author Daniel Fuchs */ import com.sun.org.apache.xml.internal.serializer.EncodingInfo; import com.sun.org.apache.xml.internal.serializer.Encodings; import java.io.InputStreamReader; import java.lang.reflect.Method; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; import java.util.StringTokenizer; public class CheckEncodingPropertiesFile { private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties"; public static void main(String[] args) throws Exception { Properties props = new Properties(); try (InputStreamReader is = new InputStreamReader(ClassLoader.getSystemResourceAsStream(ENCODINGS_FILE))) { props.load(is); } //printAllCharsets(); test(props); } private static final class CheckCharsetMapping { /** * A map that maps Java or XML name to canonical charset names. * key: upper cased value of Java or XML name. * value: case-sensitive canonical name of charset. */ private final Map<String, String> charsetMap = new HashMap<>(); private final Map<String, String> preferredMime = new HashMap<>(); /** * Unresolved alias names. * For a given set of names pointing to the same unresolved charset, * this map will contain, for each alias in the set, a mapping * with the alias.toUpperValue() as key and the set of known aliases * as value. */ private final Map<String, Collection<String>> unresolved = new HashMap<>(); public final static class ConflictingCharsetError extends Error { ConflictingCharsetError(String a, String cs1, String cs2) { super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'"); } } public final static class MissingValidCharsetNameError extends Error { MissingValidCharsetNameError(String name, Collection<String> aliases) { super(name+": Line "+aliases+" has no recognized charset alias"); } } public final static class ConflictingPreferredMimeNameError extends Error { ConflictingPreferredMimeNameError(String a, String cs1, String cs2) { super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'"); } } /** * For each alias in aliases, attempt to find the canonical * charset name. * All names in aliases are supposed to point to the same charset. * Names in aliases can be java names or XML names, indifferently. * @param aliases list of names (aliases) for a given charset. * @return The canonical name of the charset, if found, null otherwise. */ private String findCharsetNameFor(String[] aliases) { String cs = null; String res = null; for (String a : aliases) { final String k = a.toUpperCase(); String cachedCs = charsetMap.get(k); if (cs == null) { cs = cachedCs; } if (cachedCs != null && cs != null && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) { throw new ConflictingCharsetError(a,cs,cachedCs); } try { final String rcs = Charset.forName(a).name(); if (cs != null && !Charset.forName(cs).name().equals(rcs)) { throw new ConflictingCharsetError(a,cs,rcs); } if (res == null) { if (a.equals(aliases[0])) { res = a; } else { res = cs; } } cs = rcs; charsetMap.put(k, res == null ? cs : res); } catch (Exception x) { continue; } } return res == null ? cs : res; } /** * Register a canonical charset name for a given set of aliases. * * @param charsetName the canonical charset name. * @param aliases a list of aliases for the given charset. */ private void registerCharsetNameFor(String charsetName, String[] aliases) { if (charsetName == null) throw new NullPointerException(); for (String a : aliases) { String k = a.toUpperCase(); String csv = charsetMap.get(k); if (csv == null) { charsetMap.put(k, charsetName); csv = charsetName; } else if (!csv.equals(charsetName)) { throw new ConflictingCharsetError(a,charsetName,csv); } final Collection<String> c = unresolved.get(k); if (c != null) { for (String aa : c) { k = aa.toUpperCase(); String csvv = charsetMap.get(k); if (csvv == null) charsetMap.put(k, csv); unresolved.remove(k); } throw new MissingValidCharsetNameError(charsetName,c); } } } /** * Register a set of aliases as being unresolved. * @param names the list of names - this should be what is returned by * nameSet.toArray(new String[nameSet.size()]) * @param nameSet the set of unresolved aliases. */ private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) { // This is not necessarily an error: it could happen that some // charsets are simply not supported on some OS/Arch System.err.println("Warning: unresolved charset names: '"+ nameSet + "' This is not necessarily an error " + "- this charset may not be supported on this platform."); for (String a : names) { final String k = a.toUpperCase(); final Collection<String> c = unresolved.get(k); if (c != null) { //System.out.println("Found: "+a+" -> "+c); //System.out.println("\t merging "+ c + " with " + nameSet); nameSet.addAll(c); for (String aa : c) { unresolved.put(aa.toUpperCase(), nameSet); } } unresolved.put(k, nameSet); } } /** * Add a new charset name mapping * @param javaName the (supposedly) java name of the charset. * @param xmlNames a list of corresponding XML names for that charset. */ void addMapping(String javaName, Collection<String> xmlNames) { final LinkedHashSet<String> aliasNames = new LinkedHashSet<>(); aliasNames.add(javaName); aliasNames.addAll(xmlNames); final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]); final String cs = findCharsetNameFor(aliases); if (cs != null) { registerCharsetNameFor(cs, aliases); if (xmlNames.size() > 0) { String preferred = xmlNames.iterator().next(); String cachedPreferred = preferredMime.get(cs.toUpperCase()); if (cachedPreferred != null && !cachedPreferred.equals(preferred)) { throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred); } preferredMime.put(cs.toUpperCase(), preferred); } } else { registerUnresolvedNamesFor(aliases, aliasNames); } } /** * Returns the canonical name of the charset for the given Java or XML * alias name. * @param alias the alias name * @return the canonical charset name - or null if unknown. */ public String getCharsetNameFor(String alias) { return charsetMap.get(alias.toUpperCase()); } } public static void test(Properties props) throws Exception { // First, build a mapping from the properties read from the resource // file. // We're going to check the consistency of the resource file // while building this mapping, and throw errors if the file // does not meet our assumptions. // Map<String, Collection<String>> lines = new HashMap<>(); final CheckCharsetMapping mapping = new CheckCharsetMapping(); for (String key : props.stringPropertyNames()) { Collection<String> values = getValues(props.getProperty(key)); lines.put(key, values); mapping.addMapping(key, values); } // Then build maps of EncodingInfos, and print along debugging // information that should help understand the content of the // resource file and the mapping it defines. // Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names Map<String, EncodingInfo> xmlMap = new HashMap<>(); // Map indexed by XML names Map<String, String> preferred = new HashMap<>(mapping.preferredMime); // Java Name -> Preferred Mime Name List<EncodingInfo> all = new ArrayList<>(); // unused... for (Entry<String, Collection<String>> e : lines.entrySet()) { final String charsetName = mapping.getCharsetNameFor(e.getKey()); if (charsetName == null) { System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue()); continue; } Charset c = Charset.forName(charsetName); EncodingInfo info; final String k = e.getKey().toUpperCase(); final String kc = charsetName.toUpperCase(); StringBuilder sb = new StringBuilder(); for (String xml : e.getValue()) { final String kx = xml.toUpperCase(); info = xmlMap.get(kx); if (info == null) { info = new EncodingInfo(xml, charsetName); System.out.println("** XML: "+xml+" -> "+charsetName); xmlMap.put(kx, info); all.add(info); } if (!javaInfos.containsKey(k)) { javaInfos.put(k, info); if (!preferred.containsKey(k)) { preferred.put(k, xml); } sb.append("** Java: ").append(k).append(" -> ") .append(xml).append(" (charset: ") .append(charsetName).append(")\n"); } if (!javaInfos.containsKey(kc)) { if (!preferred.containsKey(kc)) { preferred.put(kc, xml); } javaInfos.put(kc, info); sb.append("** Java: ").append(kc).append(" -> ") .append(xml).append(" (charset: ") .append(charsetName).append(")\n"); } if (!javaInfos.containsKey(c.name().toUpperCase())) { if (!preferred.containsKey(c.name().toUpperCase())) { preferred.put(c.name().toUpperCase(), xml); } javaInfos.put(c.name().toUpperCase(), info); sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ") .append(xml).append(" (charset: ") .append(charsetName).append(")\n"); } } if (sb.length() == 0) { System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue()); } else { System.out.print(sb); } } // Now we're going to verify that Encodings.java has done its job // correctly. We're going to ask Encodings to convert java names to mime // names and mime names to java names - and verify that the returned // java names do map to recognized charsets. // // We're also going to verify that Encodings has recorded the preferred // mime name correctly. Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class); m.setAccessible(true); Set<String> xNames = new HashSet<>(); Set<String> jNames = new HashSet<>(); for (String name: xmlMap.keySet()) { final String javaName = checkConvertMime2Java(name); checkPreferredMime(m, javaName, preferred); jNames.add(javaName); xNames.add(name); } for (String javaName : lines.keySet()) { final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase()); if (javaCharsetName == null) continue; if (!jNames.contains(javaName)) { checkPreferredMime(m, javaName, preferred); jNames.add(javaName); } for (String xml : lines.get(javaName)) { if (xNames.contains(xml)) continue; final String jName = checkConvertMime2Java(xml); xNames.add(xml); if (jNames.contains(jName)) continue; checkPreferredMime(m, jName, preferred); } } } private static String checkConvertMime2Java(String xml) { final String jName = Encodings.convertMime2JavaEncoding(xml); final String jCharsetName; try { jCharsetName = Charset.forName(jName).name(); } catch (Exception x) { throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x); } System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")"); return jName; } private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred) throws Exception { final String mime = (String) m.invoke(null, javaName); final String expected = preferred.get(javaName.toUpperCase()); if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) { System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\""); } else { throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+ expected+"' but got '"+mime+"'"); } } private static Collection<String> getValues(String val) { int pos = val.indexOf(' '); if (pos < 0) { return Collections.singletonList(val); } //lastPrintable = // Integer.decode(val.substring(pos).trim()).intValue(); StringTokenizer st = new StringTokenizer(val.substring(0, pos), ","); final List<String> values = new ArrayList<>(st.countTokens()); while (st.hasMoreTokens()) { values.add(st.nextToken()); } return values; } // can be called in main() to help debugging. // Prints out all available charsets and their recognized aliases // as returned by the Charset API. private static void printAllCharsets() { Map<String, Charset> all = Charset.availableCharsets(); System.out.println("\n=========================================\n"); for (String can : all.keySet()) { System.out.println(can + ": " + all.get(can).aliases()); } } }