UCPFilter.java example

Explorer
marketcetera-master
package org.marketcetera.util.misc;

import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;

/**
 * A filter for Unicode code points. It also maintains a cache of
 * filters associated with {@link Charset} instances.
 *
 * <p>For charset-based filters, this class may perform slowly when
 * the cache is built, if the JVM is running with an active debugging
 * agent. This is because the JRE implements the acceptability test by
 * throwing and catching an exception, which is trapped by the agent;
 * if the charset can only encode a small subset of the Unicode code
 * points, then a lot of exceptions are thrown and caught, resulting
 * in a performance degradation as the agent intercepts repeatedly
 * (even if the debugger does not indicate an interest in
 * exceptions).</p>
 * 
 * @author tlerios@marketcetera.com
 * @since 0.6.0
 * @version $Id: UCPFilter.java 16154 2012-07-14 16:34:05Z colin $
 */

/* $License$ */

@ClassVersion("$Id: UCPFilter.java 16154 2012-07-14 16:34:05Z colin $")
public abstract class UCPFilter
{

    // CLASS DATA.

    /**
     * A filter for Unicode characters that can be encoded by a
     * specific charset.
     */

    @ClassVersion("$Id: UCPFilter.java 16154 2012-07-14 16:34:05Z colin $")
    private static final class UCPCharsetFilter
        extends UCPFilter
    {

        // INSTANCE DATA.

        private CharsetEncoder mEncoder;


        // CONSTRUCTORS.

        /**
         * Creates a filter for the given charset.
         *
         * @param cs The charset.
         */

        public UCPCharsetFilter
            (Charset cs)
        {
            mEncoder=cs.newEncoder();
        }


        // UCPFilter.

        @Override
        public boolean isAcceptable(int ucp)
        {
            return mEncoder.canEncode(StringUtils.fromUCP(ucp));
        }
    }

    /**
     * A filter for Unicode characters deemed valid by {@link
     * StringUtils#isValid(int)}.
     */

    public static final UCPFilter VALID=new UCPFilter()
        {
            @Override
            public boolean isAcceptable(int ucp)
            {
                return StringUtils.isValid(ucp);
            }
        };

    /**
     * A filter for Unicode characters that can be represented by a
     * single char.
     */

    public static final UCPFilter CHAR=new UCPFilter()
        {
            @Override
            public boolean isAcceptable(int ucp)
            {
                return ((0<=ucp) && (ucp<=0xFFFF));
            }
        };

    /**
     * A filter for Unicode code points that are digits.
     */

    public static final UCPFilter DIGIT=new UCPFilter()
        {
            @Override
            public boolean isAcceptable(int ucp)
            {
                return Character.isDigit(ucp);
            }
        };

    /**
     * A filter for Unicode code points that are letters.
     */

    public static final UCPFilter LETTER=new UCPFilter()
        {
            @Override
            public boolean isAcceptable(int ucp)
            {
                return Character.isLetter(ucp);
            }
        };

    /**
     * A filter for Unicode code points that are letters or digits.
     */

    public static final UCPFilter ALNUM=new UCPFilter()
        {
            @Override
            public boolean isAcceptable(int ucp)
            {
                return Character.isLetterOrDigit(ucp);
            }
       };

    private static final HashMap<Charset,UCPFilter> mMap=
        new HashMap<Charset,UCPFilter>();


    // CLASS METHODS.

    /**
     * Returns a filter for Unicode code points that can be encoded by
     * the given charset.
     *
     * @param cs The charset.
     *
     * @return The filter.
     */

    public static UCPFilter forCharset
        (Charset cs)
    {
        synchronized (mMap) {
            UCPFilter filter=mMap.get(cs);
            if (filter!=null) {
                return filter;
            }
            filter=new UCPCharsetFilter(cs);
            mMap.put(cs,filter);
            return filter;
        }
    }

    /**
     * Returns a filter for Unicode code points that can be encoded by
     * the default JVM charset.
     *
     * @return The filter.
     */

    public static final UCPFilter getDefaultCharset()
    {
        return UCPFilter.forCharset(Charset.defaultCharset());
    }

    /**
     * Returns a filter for Unicode code points that can be encoded by
     * the current system file encoding/charset (as specified in the
     * system property <code>file.encoding</code>).
     *
     * @return The filter.
     */

    public static final UCPFilter getFileSystemCharset()
    {
        return UCPFilter.forCharset
            (Charset.forName(System.getProperty
                             ("file.encoding"))); //$NON-NLS-1$
    }


    // INSTANCE METHODS.    
    
    /**
     * Checks whether the given Unicode code point is acceptable to
     * the receiver.
     *
     * @param ucp The code point.
     *
     * @return True if so.
     */
    
    public abstract boolean isAcceptable(int ucp);
}