/** * Copyright (c) 2003-2009, Xith3D Project Group all rights reserved. * * Portions based on the Java3D interface, Copyright by Sun Microsystems. * Many thanks to the developers of Java3D and Sun Microsystems for their * innovation and design. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the 'Xith3D Project Group' nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) A * RISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE */ package org.xith3d.utility.characters; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; /** * Provides methods to get information about printable characters. * Any printable character is assotiated with an index to be used in an array * of the size [total number of printable chars]. * * @author Kevin Finley (aka horati) * @author Marvin Froehlich (aka Qudus) */ public abstract class CharIndex { /** * For expressing values within the MemoryStrategy: * <ul> * <li>p is the number of printable characters</li> * <li>a is the number of all possible characters</li> * </ul> */ public static enum MemoryStrategy { /** * This strategy does no pre-collections of printable chars.<br> * getIndex() is executed in O(a).<br> * getNumberOfPrintableChars() is executed in O(a) the first time and then in O(1).<br> * Memory consumption is O(1). */ SPACE_OPTIMIZED, /** * This strategy uses BitSets to determine the information.<br> * getIndex() is executed in O(1) if the character is not found or (cheap) O(a) if the character is found.<br> * getNumberOfPrintableChars() is executed in O(1).<br> * Memory consumption is O(a/8). */ BITSET_OPTIMIZED, /** * This is a GC optimized detection way.<br> * getIndex() is executed in O(1).<br> * getNumberOfPrintableChars() is executed in O(1).<br> * Memory consumption is O(a*6). */ GC_OPTIMIZED, /** * This strategy uses GC-optimized hashing to retrieve and store * the information.<br> * getIndex() is executed in O(1).<br> * getNumberOfPrintableChars() is executed in O(1).<br> * Memory consumption is O(p*25). */ HASH_OPTIMIZED, /** * This strategy uses GC-optimized binary searching to retrieve and store * the information.<br> * getIndex() is executed in O(log p).<br> * getNumberOfPrintableChars() is executed in O(1).<br> * Memory consumption is O(p*2). */ BINSEARCH_OPTIMIZED; } private static final Charset DEFAULT_CHARSET = Charset.defaultCharset(); private static Charset charset = null; private static CharsetEncoder encoder = null; private static CharBuffer charBuffer = null; private static ByteBuffer byteBuffer = null; protected static boolean isPrintable( char ch, CharsetEncoder encoder, CharBuffer charBuffer, ByteBuffer byteBuffer ) { boolean result = false; if ( ch == ' ' ) { result = true; } else { switch ( Character.getType( ch ) ) { case Character.CONTROL: case Character.FORMAT: case Character.LINE_SEPARATOR: case Character.MODIFIER_LETTER: case Character.PARAGRAPH_SEPARATOR: case Character.PRIVATE_USE: case Character.SPACE_SEPARATOR: case Character.SURROGATE: case Character.UNASSIGNED: // leave it false break; case Character.COMBINING_SPACING_MARK: case Character.CONNECTOR_PUNCTUATION: case Character.CURRENCY_SYMBOL: case Character.DASH_PUNCTUATION: case Character.DECIMAL_DIGIT_NUMBER: case Character.ENCLOSING_MARK: case Character.END_PUNCTUATION: case Character.FINAL_QUOTE_PUNCTUATION: case Character.INITIAL_QUOTE_PUNCTUATION: case Character.LETTER_NUMBER: case Character.LOWERCASE_LETTER: case Character.MATH_SYMBOL: case Character.MODIFIER_SYMBOL: case Character.NON_SPACING_MARK: case Character.OTHER_LETTER: case Character.OTHER_NUMBER: case Character.OTHER_PUNCTUATION: case Character.OTHER_SYMBOL: case Character.START_PUNCTUATION: case Character.TITLECASE_LETTER: case Character.UPPERCASE_LETTER: result = true; break; default: assert false : "Has the Unicode specification been updated since this code was written?"; } if ( result ) { charBuffer.clear(); byteBuffer.clear(); charBuffer.append( ch ); charBuffer.rewind(); CoderResult cr = encoder.encode( charBuffer, byteBuffer, true ); if ( cr.isUnmappable() ) { result = false; } } } return ( result ); } /** * Simply determines, if the given char is printable without any optimizations. * * @param ch the questionary char * @param charset the Charset to use * * @return true, if the char is printable */ public static boolean isPrintable( char ch, Charset charset ) { if ( charset == null ) { if ( CharIndex.charset == null ) charset = DEFAULT_CHARSET; else charset = CharIndex.charset; if ( !CharIndex.charset.equals( charset ) ) encoder = null; } else if ( CharIndex.charset != null ) { if ( !CharIndex.charset.equals( charset ) ) encoder = null; } if ( encoder == null ) { CharIndex.charset = charset; CharIndex.encoder = charset.newEncoder(); CharIndex.encoder.onUnmappableCharacter( CodingErrorAction.REPORT ); CharIndex.encoder.onMalformedInput( CodingErrorAction.REPORT ); CharIndex.charBuffer = CharBuffer.allocate( 1 ); CharIndex.byteBuffer = ByteBuffer.allocate( 2 ); } return ( isPrintable( ch, encoder, charBuffer, byteBuffer ) ); } /** * Simply determines, if the given char is printable without any optimizations. * * @param ch the questionary char * @param charset the Charset to use * * @return true, if the char is printable */ public static boolean isPrintable( char ch, String charset ) { return ( isPrintable( ch, Charset.forName( charset ) ) ); } /** * Simply determines, if the given char is printable without any optimizations. * * @param ch the questionary char * * @return true, if the char is printable */ public static boolean isPrintable( char ch ) { return ( isPrintable( ch, ( CharIndex.charset != null ) ? CharIndex.charset : DEFAULT_CHARSET ) ); } /** * @return the MemoryStrategy used by this CharIndex */ public abstract MemoryStrategy getMemoryStrategy(); /** * @return the Charset used by this CharIndex */ public abstract Charset getCharset(); /** * @return the total number of chars, that is cared of. */ public abstract int getTotalNumberOfCharacters(); /** * @return the number of printable chars in this CharIndex. */ public abstract int getNumberOfPrintableChars(); /** * If you want to allocate a smaller array [size: getNumberOfPrintableChars()], * You can use the int returned by this method as the index in this array. * * @param ch the questionary char * * @return the index in a smaller array * * @see #getNumberOfPrintableChars() */ public abstract int getIndex( char ch ); protected CharIndex() { } /** * Creates a space-optimized CharIndex. * * @see MemoryStrategy#SPACE_OPTIMIZED */ public static CharIndex createSpaceOptimizedCharIndex( Charset charset ) { assert ( charset != null ) : "Charset must not be null."; return ( new SpaceOptimizedCharIndex( charset ) ); } /** * Creates a space-optimized CharIndex. * * @see MemoryStrategy#SPACE_OPTIMIZED */ public static CharIndex createSpaceOptimizedCharIndex( String charset ) { assert ( charset != null ) : "Charset must not be null."; return ( createSpaceOptimizedCharIndex( Charset.forName( charset ) ) ); } /** * Creates a space-optimized CharIndex. * * @see MemoryStrategy#SPACE_OPTIMIZED */ public static CharIndex createSpaceOptimizedCharIndex() { return ( createSpaceOptimizedCharIndex( Charset.defaultCharset() ) ); } /** * Creates a BitSet-optimized CharIndex. * * @see MemoryStrategy#BITSET_OPTIMIZED */ public static CharIndex createBitSetOptimizedCharIndex( Charset charset ) { assert ( charset != null ) : "Charset must not be null."; return ( new BitSetOptimizedCharIndex( charset ) ); } /** * Creates a BitSet-optimized CharIndex. * * @see MemoryStrategy#BITSET_OPTIMIZED */ public static CharIndex createBitSetOptimizedCharIndex( String charset ) { assert ( charset != null ) : "Charset must not be null."; return ( createBitSetOptimizedCharIndex( Charset.forName( charset ) ) ); } /** * Creates a BitSet-optimized CharIndex. * * @see MemoryStrategy#BITSET_OPTIMIZED */ public static CharIndex createBitSetOptimizedCharIndex() { return ( createBitSetOptimizedCharIndex( Charset.defaultCharset() ) ); } /** * Creates a GC-optimized CharIndex. * * @see MemoryStrategy#GC_OPTIMIZED */ public static CharIndex createGCOptimizedCharIndex( Charset charset ) { assert ( charset != null ) : "Charset must not be null."; return ( new GCOptimizedCharIndex( charset ) ); } /** * Creates a GC-optimized CharIndex. * * @see MemoryStrategy#GC_OPTIMIZED */ public static CharIndex createGCOptimizedCharIndex( String charset ) { assert ( charset != null ) : "Charset must not be null."; return ( createGCOptimizedCharIndex( Charset.forName( charset ) ) ); } /** * Creates a GC-optimized CharIndex. * * @see MemoryStrategy#GC_OPTIMIZED */ public static CharIndex createGCOptimizedCharIndex() { return ( createGCOptimizedCharIndex( Charset.defaultCharset() ) ); } /** * Creates a hash-optimized CharIndex. * * @see MemoryStrategy#HASH_OPTIMIZED */ public static CharIndex createHashOptimizedCharIndex( Charset charset ) { assert ( charset != null ) : "Charset must not be null."; return ( new HashOptimizedCharIndex( charset ) ); } /** * Creates a hash-optimized CharIndex. * * @see MemoryStrategy#HASH_OPTIMIZED */ public static CharIndex createHashOptimizedCharIndex( String charset ) { assert ( charset != null ) : "Charset must not be null."; return ( createHashOptimizedCharIndex( Charset.forName( charset ) ) ); } /** * Creates a hash-optimized CharIndex. * * @see MemoryStrategy#HASH_OPTIMIZED */ public static CharIndex createHashOptimizedCharIndex() { return ( createHashOptimizedCharIndex( Charset.defaultCharset() ) ); } /** * Creates a search-optimized CharIndex. * * @see MemoryStrategy#BINSEARCH_OPTIMIZED */ public static CharIndex createBinSearchOptimizedCharIndex( Charset charset ) { assert ( charset != null ) : "Charset must not be null."; return ( new BinarySearchCharIndex( charset ) ); } /** * Creates a search-optimized CharIndex. * * @see MemoryStrategy#BINSEARCH_OPTIMIZED */ public static CharIndex createBinSearchOptimizedCharIndex( String charset ) { assert ( charset != null ) : "Charset must not be null."; return ( createBinSearchOptimizedCharIndex( Charset.forName( charset ) ) ); } /** * Creates a search-optimized CharIndex. * * @see MemoryStrategy#BINSEARCH_OPTIMIZED */ public static CharIndex createBinSearchOptimizedCharIndex() { return ( createBinSearchOptimizedCharIndex( Charset.defaultCharset() ) ); } /* public static void main( String[] args ) { final long t0 = System.currentTimeMillis(); //CharIndex ci = CharIndex.createSpaceOptimizedCharIndex( "ISO-8859-1" ); //CharIndex ci = CharIndex.createBitSetOptimizedCharIndex( "ISO-8859-1" ); //CharIndex ci = CharIndex.createGCOptimizedCharIndex( "ISO-8859-1" ); CharIndex ci = CharIndex.createHashOptimizedCharIndex( "ISO-8859-1" ); //CharIndex ci = CharIndex.createSearchOptimizedCharIndex( "ISO-8859-1" ); final long t1 = System.currentTimeMillis(); for ( int i = 0; i < 10000; i++ ) { ci.getIndex( 'Z' ); } final long t2 = System.currentTimeMillis(); System.out.printf( "Construction %d ms\tExecution %d ms\tPer operation %f ms\n", t1 - t0, t2 - t1, ( t2 - t1 ) / 10000. ); System.out.println( ci.getIndex( 'A' ) ); System.out.println( ci.getNumberOfPrintableChars() ); } */ }