/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.parser.unicode; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.nio.charset.Charset; import com.google.common.base.Preconditions; /** * Reads the binary file generated by {@link org.novelang.build.unicode.UnicodeNamesGenerator}. * * @author Laurent Caillette */ /*package*/ class UnicodeNamesBinaryReader { private static final Charset CHARSET = Charset.forName( "UTF-8" ) ; private final URL resourceUrl ; public UnicodeNamesBinaryReader( final URL resourceUrl ) { this.resourceUrl = Preconditions.checkNotNull( resourceUrl ) ; } public String getName( final char character ) throws IOException, CharacterOutOfBoundsException { final InputStream inputStream = resourceUrl.openStream() ; final String name ; try { name = readName( inputStream, character ) ; } finally { inputStream.close() ; } return name; } /*package*/ static String readName( final InputStream inputStream, final char character ) throws IOException, CharacterOutOfBoundsException { final byte[] characterCountAsBytes = new byte[ 4 ] ; if( inputStream.read( characterCountAsBytes ) != 4 ) { throw new IOException( "Could not read character count, missing bytes" ) ; } final long characterCount = asLong( characterCountAsBytes ) ; if( characterCount > 256 * 256 ) { throw new IOException( "Incorrect character count, may not exceed 65536" ) ; } if( character >= characterCount ) { throw new CharacterOutOfBoundsException( ( int ) characterCount, character ) ; } final int firstOffset = character * 4 ; skipForSure( inputStream, firstOffset ) ; final byte[] nameOffsetAsBytes = new byte[ 4 ] ; if( inputStream.read( nameOffsetAsBytes ) != 4 ) { throw new IOException( "Could not read offset, missing bytes" ) ; } final long nameOffsetFromStart = asLong( nameOffsetAsBytes ); if( nameOffsetFromStart == 0 ) { return null ; } final long relativeNameOffset = nameOffsetFromStart - firstOffset - 8 ; skipForSure( inputStream, relativeNameOffset ) ; final ByteArrayOutputStream nameBytes = new ByteArrayOutputStream() ; while( true ) { final int read = inputStream.read() ; if( read == 0 ) { break ; } else { nameBytes.write( read ) ; } } return nameBytes.toString( CHARSET.name() ) ; } /*package*/ static long asLong( final byte[] nameOffsetAsBytes ) { Preconditions.checkArgument( nameOffsetAsBytes.length == 4 ) ; long nameOffsetFromStart = 0 ; for( int i = 0 ; i < 4 ; i ++ ) { nameOffsetFromStart <<= 8 ; // Does nothing at the first iteration. // Get rid of the sign of the byte by and'ing it. final long unsignedByteAsLong = nameOffsetAsBytes[ i ] & 0x000000FF ; nameOffsetFromStart |= unsignedByteAsLong; } return nameOffsetFromStart ; } private static void skipForSure( final InputStream inputStream, final long relativeOffset ) throws IOException { long skipped = 0 ; while( skipped < relativeOffset ) { skipped += inputStream.skip( relativeOffset - skipped ) ; } if( skipped != relativeOffset ) { throw new IOException( "Attempted to skip " + relativeOffset + " but skipped " + skipped ) ; } } }