Charset.java example

Explorer
android_libcore-master
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package java.nio.charset;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.spi.CharsetProvider;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Vector;

// BEGIN android-changed
import com.ibm.icu4jni.charset.CharsetProviderICU;
// END android-changed

/**
 * A charset defines a mapping between a Unicode character sequence and a byte
 * sequence. It facilitates the encoding from a Unicode character sequence into
 * a byte sequence, and the decoding from a byte sequence into a Unicode
 * character sequence.
 * <p>
 * A charset has a canonical name, which is usually in uppercase. Typically it
 * also has one or more aliases. The name string can only consist of the
 * following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'.
 * The first character of the name must be a digit or a letter.
 * <p>
 * The following charsets should be supported by any java platform: US-ASCII,
 * ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.
 * <p>
 * Additional charsets can be made available by configuring one or more charset
 * providers through provider configuration files. Such files are always named
 * as "java.nio.charset.spi.CharsetProvider" and located in the
 * "META-INF/services" sub folder of one or more classpaths. The files should be
 * encoded in "UTF-8". Each line of their content specifies the class name of a
 * charset provider which extends
 * <code>java.nio.charset.spi.CharsetProvider</code>. A line should end with
 * '\r', '\n' or '\r\n'. Leading and trailing whitespaces are trimmed. Blank
 * lines, and lines (after trimming) starting with "#" which are regarded as
 * comments, are both ignored. Duplicates of names already found are also
 * ignored. Both the configuration files and the provider classes will be loaded
 * using the thread context class loader.
 * <p>
 * This class is thread-safe.
 *
 * @see java.nio.charset.spi.CharsetProvider
 */
public abstract class Charset implements Comparable<Charset> {

    /*
     * The name of configuration files where charset provider class names can be
     * specified.
     */
    private static final String PROVIDER_CONFIGURATION_FILE_NAME = "META-INF/services/java.nio.charset.spi.CharsetProvider"; //$NON-NLS-1$

    /*
     * The encoding of configuration files
     */
    private static final String PROVIDER_CONFIGURATION_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$

    /*
     * The comment string used in configuration files
     */
    private static final String PROVIDER_CONFIGURATION_FILE_COMMENT = "#"; //$NON-NLS-1$

    private static ClassLoader systemClassLoader;

    // built in provider instance, assuming thread-safe
    // BEGIN android-changed
    private static final CharsetProviderICU _builtInProvider;
    // END android-changed

    // cached built in charsets
    private static TreeMap<String, Charset> _builtInCharsets = null;

    private final String canonicalName;

    // the aliases set
    private final HashSet<String> aliasesSet;

    // cached Charset table
    private final static HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>();

    private static boolean inForNameInternal = false;

    static {
        /*
         * Create built-in charset provider even if no privilege to access
         * charset provider.
         */
        _builtInProvider = AccessController
                .doPrivileged(new PrivilegedAction<CharsetProviderICU>() {
                    public CharsetProviderICU run() {
                         return new CharsetProviderICU();
                   }
                });
    }

    /**
     * Constructs a <code>Charset</code> object. Duplicated aliases are
     * ignored.
     * 
     * @param canonicalName
     *            the canonical name of the charset.
     * @param aliases
     *            an array containing all aliases of the charset. May be null.
     * @throws IllegalCharsetNameException
     *             on an illegal value being supplied for either
     *             <code>canonicalName</code> or for any element of
     *             <code>aliases</code>.
     */
    protected Charset(String canonicalName, String[] aliases) {
        if (null == canonicalName) {
            throw new NullPointerException();
        }
        // check whether the given canonical name is legal
        checkCharsetName(canonicalName);
        this.canonicalName = canonicalName;
        // check each alias and put into a set
        this.aliasesSet = new HashSet<String>();
        if (null != aliases) {
            for (int i = 0; i < aliases.length; i++) {
                checkCharsetName(aliases[i]);
                this.aliasesSet.add(aliases[i]);
            }
        }
    }

    /*
     * Checks whether a character is a special character that can be used in
     * charset names, other than letters and digits.
     */
    private static boolean isSpecial(char c) {
        return ('-' == c || '.' == c || ':' == c || '_' == c);
    }

    /*
     * Checks whether a character is a letter (ascii) which are defined in the
     * spec.
     */
    private static boolean isLetter(char c) {
        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
    }

    /*
     * Checks whether a character is a digit (ascii) which are defined in the
     * spec.
     */
    private static boolean isDigit(char c) {
        return ('0' <= c && c <= '9');
    }

    /*
     * Checks whether a given string is a legal charset name. The argument name
     * should not be null.
     */
    private static void checkCharsetName(String name) {
        // An empty string is illegal charset name
        if (name.length() == 0) {
            throw new IllegalCharsetNameException(name);
        }
        // The first character must be a letter or a digit
        // This is related to HARMONY-68 (won't fix)
        // char first = name.charAt(0);
        // if (!isLetter(first) && !isDigit(first)) {
        // throw new IllegalCharsetNameException(name);
        // }
        // Check the remaining characters
        int length = name.length();
        for (int i = 0; i < length; i++) {
            char c = name.charAt(i);
            if (!isLetter(c) && !isDigit(c) && !isSpecial(c)) {
                throw new IllegalCharsetNameException(name);
            }
        }
    }

    /*
     * Use privileged code to get the context class loader.
     */
    private static ClassLoader getContextClassLoader() {
        final Thread t = Thread.currentThread();
        return AccessController
                .doPrivileged(new PrivilegedAction<ClassLoader>() {
                    public ClassLoader run() {
                        return t.getContextClassLoader();
                    }
                });
    }

    /*
     * Use privileged code to get the system class loader.
     */
    private static void getSystemClassLoader() {
        if (null == systemClassLoader) {
            systemClassLoader = AccessController
                    .doPrivileged(new PrivilegedAction<ClassLoader>() {
                        public ClassLoader run() {
                            return ClassLoader.getSystemClassLoader();
                        }
                    });
        }
    }

    /*
     * Add the charsets supported by the given provider to the map.
     */
    private static void addCharsets(CharsetProvider cp,
            TreeMap<String, Charset> charsets) {
        Iterator<Charset> it = cp.charsets();
        while (it.hasNext()) {
            Charset cs = it.next();
            // Only new charsets will be added
            if (!charsets.containsKey(cs.name())) {
                charsets.put(cs.name(), cs);
            }
        }
    }

    /*
     * Trim comment string, and then trim white spaces.
     */
    private static String trimClassName(String name) {
        String trimedName = name;
        int index = name.indexOf(PROVIDER_CONFIGURATION_FILE_COMMENT);
        // Trim comments
        if (index != -1) {
            trimedName = name.substring(0, index);
        }
        return trimedName.trim();
    }

    /*
     * Read a configuration file and add the charsets supported by the providers
     * specified by this configuration file to the map.
     */
    private static void loadConfiguredCharsets(URL configFile,
            ClassLoader contextClassLoader, TreeMap<String, Charset> charsets) {
        BufferedReader reader = null;
        try {
            InputStream is = configFile.openStream();
            // Read each line for charset provider class names
            // BEGIN android-modified
            reader = new BufferedReader(new InputStreamReader(is,
                            PROVIDER_CONFIGURATION_FILE_ENCODING), 8192);
            // END android-modified
            String providerClassName = reader.readLine();
            while (null != providerClassName) {
                providerClassName = trimClassName(providerClassName);
                // Skip comments and blank lines
                if (providerClassName.length() > 0) { // Non empty string
                    // Load the charset provider
                    Object cp = null;
                    try {
                        Class<?> c = Class.forName(providerClassName, true,
                                contextClassLoader);
                        cp = c.newInstance();
                    } catch (Exception ex) {
                        // try to use system classloader when context
                        // classloader failed to load config file.
                        try {
                            getSystemClassLoader();
                            Class<?> c = Class.forName(providerClassName, true,
                                    systemClassLoader);
                            cp = c.newInstance();
                        } catch (Exception e) {
                            throw new Error(e.getMessage(), e);
                        }
                    }
                    // Put the charsets supported by this provider into the map
                    addCharsets((CharsetProvider) cp, charsets);
                }
                // Read the next line of the config file
                providerClassName = reader.readLine();
            }
        } catch (IOException ex) {
            // Can't read this configuration file, ignore
        } finally {
            try {
                if (null != reader) {
                    reader.close();
                }
            } catch (IOException ex) {
                // Ignore closing exception
            }
        }
    }

    /**
     * Gets a map of all available charsets supported by the runtime.
     * <p>
     * The returned map contains mappings from canonical names to corresponding
     * instances of <code>Charset</code>. The canonical names can be considered
     * as case-insensitive.
     *
     * @return an unmodifiable map of all available charsets supported by the
     *         runtime
     */
    @SuppressWarnings("unchecked")
    public static SortedMap<String, Charset> availableCharsets() {
        // BEGIN android-removed
        // // workaround: conflicted Charsets with icu4j 4.0
        // Charset.forName("TIS-620");
        // Charset.forName("windows-1258");
        // Charset.forName("cp856");
        // Charset.forName("cp922");
        // END android-removed

        // Initialize the built-in charsets map cache if necessary
        if (null == _builtInCharsets) {
            synchronized (Charset.class) {
                if (null == _builtInCharsets) {
                    _builtInCharsets = new TreeMap<String, Charset>(
                            IgnoreCaseComparator.getInstance());
                    _builtInProvider.putCharsets(_builtInCharsets);
                }
            }
        }

        // Add built-in charsets
        TreeMap<String, Charset> charsets = (TreeMap<String, Charset>) _builtInCharsets
                .clone();

        // Collect all charsets provided by charset providers
        ClassLoader contextClassLoader = getContextClassLoader();
        Enumeration<URL> e = null;
        try {
            if (null != contextClassLoader) {
                e = contextClassLoader
                        .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
            } else {
                getSystemClassLoader();
                e = systemClassLoader
                        .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
            }
            // Examine each configuration file
            while (e.hasMoreElements()) {
                loadConfiguredCharsets(e.nextElement(), contextClassLoader,
                        charsets);
            }
        } catch (IOException ex) {
            // Unexpected ClassLoader exception, ignore
        }
        return Collections.unmodifiableSortedMap(charsets);
    }

    /*
     * Read a configuration file and try to find the desired charset among those
     * which are supported by the providers specified in this configuration
     * file.
     */
    private static Charset searchConfiguredCharsets(String charsetName,
            ClassLoader contextClassLoader, URL configFile) {
        BufferedReader reader = null;
        try {
            InputStream is = configFile.openStream();
            // Read each line for charset provider class names
            // BEGIN android-modified
            reader = new BufferedReader(new InputStreamReader(is,
                            PROVIDER_CONFIGURATION_FILE_ENCODING), 8192);
            // END android-modified
            String providerClassName = reader.readLine();
            while (null != providerClassName) {
                providerClassName = trimClassName(providerClassName);
                if (providerClassName.length() > 0) { // Non empty string
                    // Load the charset provider
                    Object cp = null;
                    try {
                        Class<?> c = Class.forName(providerClassName, true,
                                contextClassLoader);
                        cp = c.newInstance();
                    } catch (Exception ex) {
                        // try to use system classloader when context
                        // classloader failed to load config file.
                        try {
                            getSystemClassLoader();
                            Class<?> c = Class.forName(providerClassName, true,
                                    systemClassLoader);
                            cp = c.newInstance();
                        } catch (SecurityException e) {
                            // BEGIN android-changed
                            // ignore
                            // END android-changed
                        } catch (Exception e) {
                            throw new Error(e.getMessage(), e);
                        }
                    }
                    // BEGIN android-changed
                    if (cp != null) {
                        // Try to get the desired charset from this provider
                        Charset cs = ((CharsetProvider) cp)
                                .charsetForName(charsetName);
                        if (null != cs) {
                            return cs;
                        }
                    }
                    // END android-changed
                }
                // Read the next line of the config file
                providerClassName = reader.readLine();
            }
            return null;
        } catch (IOException ex) {
            // Can't read this configuration file
            return null;
        } finally {
            try {
                if (null != reader) {
                    reader.close();
                }
            } catch (IOException ex) {
                // Ignore closing exception
            }
        }
    }

    /*
     * Gets a <code>Charset</code> instance for the specified charset name. If
     * the charset is not supported, returns null instead of throwing an
     * exception.
     */
    private synchronized static Charset forNameInternal(String charsetName)
            throws IllegalCharsetNameException {
        Charset cs = cachedCharsetTable.get(charsetName);
        if (null != cs) {
            return cs;
        }

        if (null == charsetName) {
            throw new IllegalArgumentException();
        }
        checkCharsetName(charsetName);
        // try built-in charsets
        // BEGIN android-removed
        // if (_builtInProvider == null) {
        //     _builtInProvider = new CharsetProviderICU();
        // }
        // END android-removed
        cs = _builtInProvider.charsetForName(charsetName);
        if (null != cs) {
            cacheCharset(cs);
            return cs;
        }

        // collect all charsets provided by charset providers
        ClassLoader contextClassLoader = getContextClassLoader();
        Enumeration<URL> e = null;
        try {
            if (null != contextClassLoader) {
                e = contextClassLoader
                        .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
            } else {
                getSystemClassLoader();
                if (systemClassLoader == null) {
                    // Non available during class library start-up phase
                    e = new Vector<URL>().elements();
                } else {
                    e = systemClassLoader
                            .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
                }
            }

            // examine each configuration file
            while (e.hasMoreElements()) {
			     inForNameInternal = true;
			     cs = searchConfiguredCharsets(charsetName, contextClassLoader,
                        e.nextElement());
				 inForNameInternal = false;
                if (null != cs) {
                    cacheCharset(cs);
                    return cs;
                }
            }
        } catch (IOException ex) {
            // Unexpected ClassLoader exception, ignore
        } finally {
		    inForNameInternal = false;
        }
        return null;
    }

    /*
     * save charset into cachedCharsetTable
     */
    private static void cacheCharset(Charset cs) {
        if (!cachedCharsetTable.containsKey(cs.name())){
            cachedCharsetTable.put(cs.name(), cs);  
        }
        Set<String> aliasesSet = cs.aliases();
        if (null != aliasesSet) {
            Iterator<String> iter = aliasesSet.iterator();
            while (iter.hasNext()) {
                String alias = iter.next();
                if (!cachedCharsetTable.containsKey(alias)) {
                    cachedCharsetTable.put(alias, cs); 
                }
            }
        }
    }

    /**
     * Gets a <code>Charset</code> instance for the specified charset name.
     * 
     * @param charsetName
     *            the canonical name of the charset or an alias.
     * @return a <code>Charset</code> instance for the specified charset name.
     * @throws IllegalCharsetNameException
     *             if the specified charset name is illegal.
     * @throws UnsupportedCharsetException
     *             if the desired charset is not supported by this runtime.
     */
    public static Charset forName(String charsetName) {
        Charset c = forNameInternal(charsetName);
        if (null == c) {
            throw new UnsupportedCharsetException(charsetName);
        }
        return c;
    }

    /**
     * Determines whether the specified charset is supported by this runtime.
     * 
     * @param charsetName
     *            the name of the charset.
     * @return true if the specified charset is supported, otherwise false.
     * @throws IllegalCharsetNameException
     *             if the specified charset name is illegal.
     */
    public static synchronized boolean isSupported(String charsetName) {
        if (inForNameInternal) {
            Charset cs = cachedCharsetTable.get(charsetName);
            if (null != cs) {
                return true;
            }

            if (null == charsetName) {
                throw new IllegalArgumentException();
            }
            checkCharsetName(charsetName);

            // Try built-in charsets
            // BEGIN android-removed
            // if (_builtInProvider == null) {
            //     _builtInProvider = new CharsetProviderICU();
            // }
            // END android-removed
            cs = _builtInProvider.charsetForName(charsetName);
            if (null != cs) {
                cacheCharset(cs);
                return true;
            }
            return false;
        } else {
            Charset cs = forNameInternal(charsetName);
            return (null != cs);
        }

    }

    /**
     * Determines whether this charset is a super set of the given charset.
     * 
     * @param charset
     *            a given charset.
     * @return true if this charset is a super set of the given charset,
     *         false if it's unknown or this charset is not a superset of
     *         the given charset.
     */
    public abstract boolean contains(Charset charset);

    /**
     * Gets a new instance of an encoder for this charset.
     * 
     * @return a new instance of an encoder for this charset.
     */
    public abstract CharsetEncoder newEncoder();

    /**
     * Gets a new instance of a decoder for this charset.
     * 
     * @return a new instance of a decoder for this charset.
     */
    public abstract CharsetDecoder newDecoder();

    /**
     * Gets the canonical name of this charset.
     * 
     * @return this charset's name in canonical form.
     */
    public final String name() {
        return this.canonicalName;
    }

    /**
     * Gets the set of this charset's aliases.
     * 
     * @return an unmodifiable set of this charset's aliases.
     */
    public final Set<String> aliases() {
        return Collections.unmodifiableSet(this.aliasesSet);
    }

    /**
     * Gets the name of this charset for the default locale.
     * 
     * <p>The default implementation returns the canonical name of this charset.
     * Subclasses may return a localized display name.
     *
     * @return the name of this charset for the default locale.
     */
    public String displayName() {
        return this.canonicalName;
    }

    /**
     * Gets the name of this charset for the specified locale.
     *
     * <p>The default implementation returns the canonical name of this charset.
     * Subclasses may return a localized display name.
     *
     * @param l
     *            a certain locale
     * @return the name of this charset for the specified locale
     */
    public String displayName(Locale l) {
        return this.canonicalName;
    }

    /**
     * Indicates whether this charset is known to be registered in the IANA
     * Charset Registry.
     * 
     * @return true if the charset is known to be registered, otherwise returns
     *         false.
     */
    public final boolean isRegistered() {
        return !canonicalName.startsWith("x-") //$NON-NLS-1$
                && !canonicalName.startsWith("X-"); //$NON-NLS-1$
    }

    /**
     * Returns true if this charset supports encoding, false otherwise.
     * 
     * @return true if this charset supports encoding, false otherwise.
     */
    public boolean canEncode() {
        return true;
    }

    /**
     * Encodes the content of the give character buffer and outputs to a byte
     * buffer that is to be returned.
     * <p>
     * The default action in case of encoding errors is
     * <code>CodingErrorAction.REPLACE</code>.
     *
     * @param buffer
     *            the character buffer containing the content to be encoded.
     * @return the result of the encoding.
     */
    public final ByteBuffer encode(CharBuffer buffer) {
        try {
            return this.newEncoder()
                    .onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE).encode(
                            buffer);

        } catch (CharacterCodingException ex) {
            throw new Error(ex.getMessage(), ex);
        }
    }

    /**
     * Encodes a string and outputs to a byte buffer that is to be returned.
     * <p>
     * The default action in case of encoding errors is
     * <code>CodingErrorAction.REPLACE</code>.
     *
     * @param s
     *            the string to be encoded.
     * @return the result of the encoding.
     */
    public final ByteBuffer encode(String s) {
        return encode(CharBuffer.wrap(s));
    }

    /**
     * Decodes the content of the specified byte buffer and writes it to a
     * character buffer that is to be returned.
     * <p>
     * The default action in case of decoding errors is
     * <code>CodingErrorAction.REPLACE</code>.
     * 
     * @param buffer
     *            the byte buffer containing the content to be decoded.
     * @return a character buffer containing the output of the decoding.
     */
    public final CharBuffer decode(ByteBuffer buffer) {

        try {
            return this.newDecoder()
                    .onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE).decode(
                            buffer);

        } catch (CharacterCodingException ex) {
            throw new Error(ex.getMessage(), ex);
        }
    }

    /*
     * -------------------------------------------------------------------
     * Methods implementing parent interface Comparable
     * -------------------------------------------------------------------
     */

    /**
     * Compares this charset with the given charset. This comparation is
     * based on the case insensitive canonical names of the charsets.
     * 
     * @param charset
     *            the given object to be compared with.
     * @return a negative integer if less than the given object, a positive
     *         integer if larger than it, or 0 if equal to it.
     */
    public final int compareTo(Charset charset) {
        return this.canonicalName.compareToIgnoreCase(charset.canonicalName);
    }

    /*
     * -------------------------------------------------------------------
     * Methods overriding parent class Object
     * -------------------------------------------------------------------
     */

    /**
     * Determines whether this charset equals to the given object. They are
     * considered to be equal if they have the same canonical name.
     * 
     * @param obj
     *            the given object to be compared with.
     * @return true if they have the same canonical name, otherwise false.
     */
    @Override
    public final boolean equals(Object obj) {
        if (obj instanceof Charset) {
            Charset that = (Charset) obj;
            return this.canonicalName.equals(that.canonicalName);
        }
        return false;
    }

    /**
     * Gets the hash code of this charset.
     * 
     * @return the hash code of this charset.
     */
    @Override
    public final int hashCode() {
        return this.canonicalName.hashCode();
    }

    /**
     * Gets a string representation of this charset. Usually this contains the
     * canonical name of the charset.
     * 
     * @return a string representation of this charset.
     */
    @Override
    public final String toString() {
        return "Charset[" + this.canonicalName + "]"; //$NON-NLS-1$//$NON-NLS-2$
    }

    /**
     * Gets the system default charset from the virtual machine.
     * 
     * @return the default charset.
     */
    public static Charset defaultCharset() {
        Charset defaultCharset = null;
        String encoding = AccessController
                .doPrivileged(new PrivilegedAction<String>() {
                    public String run() {
                        return System.getProperty("file.encoding"); //$NON-NLS-1$
                    }
                });
        try {
            defaultCharset = Charset.forName(encoding);
        } catch (UnsupportedCharsetException e) {
            defaultCharset = Charset.forName("UTF-8"); //$NON-NLS-1$
        }
        return defaultCharset;
    }

    /**
     * A comparator that ignores case.
     */
    static class IgnoreCaseComparator implements Comparator<String> {

        // the singleton
        private static Comparator<String> c = new IgnoreCaseComparator();

        /*
         * Default constructor.
         */
        private IgnoreCaseComparator() {
            // no action
        }

        /*
         * Gets a single instance.
         */
        public static Comparator<String> getInstance() {
            return c;
        }

        /*
         * Compares two strings ignoring case.
         */
        public int compare(String s1, String s2) {
            return s1.compareToIgnoreCase(s2);
        }
    }
}