package org.marketcetera.util.unicode; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PushbackInputStream; import java.io.Reader; import java.nio.CharBuffer; import java.util.Arrays; import org.marketcetera.util.misc.ClassVersion; /** * A variation of {@link InputStreamReader} that is BOM-aware. It can * operate in any of the following modes: * * <ul> * * <li>As a standard input stream reader that uses the default JVM * charset.</li> * * <li>A reader that uses a specific charset and assumes a specific * signature is present in the input stream (and skips it without * confirming that it's actually present and valid).</li> * * <li>A reader that looks for a signature match among several * candidates, and thus automatically determines the charset.</li> * * </ul> * * @author tlerios@marketcetera.com * @since 0.6.0 * @version $Id: UnicodeInputStreamReader.java 16154 2012-07-14 16:34:05Z colin $ */ /* $License$ */ @ClassVersion("$Id: UnicodeInputStreamReader.java 16154 2012-07-14 16:34:05Z colin $") public class UnicodeInputStreamReader extends Reader { // INSTANCE DATA. private PushbackInputStream mStream; private InputStreamReader mReader; private DecodingStrategy mDecodingStrategy; private SignatureCharset mRequestedSignatureCharset; private SignatureCharset mSignatureCharset; // CONSTRUCTORS. /** * Creates a new reader over the given stream that uses the * default JVM charset. * * @param stream The stream. */ public UnicodeInputStreamReader (InputStream stream) { super(stream); mStream=new PushbackInputStream(stream,Signature.getLongestLength()); } /** * Creates a new reader over the given stream that normally * assumes the given signature is present and its associated * charset should be used. However, if the charset in the given * signature/charset pair is not supported by the JVM, the default * JVM charset is used instead. * * @param stream The stream. * @param requestedSignatureCharset The signature/charset. It may * be null to use the default JVM charset. */ public UnicodeInputStreamReader (InputStream stream, SignatureCharset requestedSignatureCharset) { this(stream); mRequestedSignatureCharset=requestedSignatureCharset; } /** * Creates a new reader over the given stream that normally uses * the charset associated with a matching signature among those * of the given decoding strategy. However, if no signature * matches, or if the charset of the matching signature is not * supported by the JVM, the default JVM charset is used instead. * * @param stream The stream. * @param decodingStrategy The decoding strategy. It may be null * to use the default JVM charset. */ public UnicodeInputStreamReader (InputStream stream, DecodingStrategy decodingStrategy) { this(stream); mDecodingStrategy=decodingStrategy; } // Reader. @Override public int read (CharBuffer target) throws IOException { synchronized (lock) { init(); return mReader.read(target); } } @Override public int read() throws IOException { synchronized (lock) { init(); return mReader.read(); } } @Override public int read (char[] cbuf) throws IOException { synchronized (lock) { init(); return mReader.read(cbuf); } } @Override public int read (char[] cbuf, int off, int len) throws IOException { synchronized (lock) { init(); return mReader.read(cbuf,off,len); } } @Override public long skip (long n) throws IOException { synchronized (lock) { init(); return mReader.skip(n); } } @Override public boolean ready() throws IOException { synchronized (lock) { if (mStream==null) { throw new IOException(Messages.STREAM_CLOSED.getText()); } if (mReader==null) { return false; } return mReader.ready(); } } @Override public boolean markSupported() { try { init(); } catch (IOException ex) { Messages.STREAM_ACCESS_ERROR.error(this,ex); return false; } return mReader.markSupported(); } @Override public void mark (int readAheadLimit) throws IOException { synchronized (lock) { init(); mReader.mark(readAheadLimit); } } @Override public void close() throws IOException { synchronized (lock) { if (mStream==null) { return; } if (mReader!=null) { mReader.close(); } mStream.close(); mStream=null; } } // INSTANCE METHODS. /** * Returns the receiver's decoding strategy. * * @return The strategy, which may be null if none was specified. */ public DecodingStrategy getDecodingStrategy() { return mDecodingStrategy; } /** * Returns the receiver's requested signature/charset. * * @return The requested signature/charset, which may be null if * none was requested. */ public SignatureCharset getRequestedSignatureCharset() { return mRequestedSignatureCharset; } /** * Returns the receiver's actual signature/charset (that is, the * one in use to decode the stream). * * @return The signature/charset, which may be null if the default * JVM charset is used. * * @throws IOException Thrown if an I/O error occurs. */ public SignatureCharset getSignatureCharset() throws IOException { synchronized (lock) { init(); return mSignatureCharset; } } /** * Initializes the receiver. * * @throws IOException Thrown if an I/O error occurs. */ private void init() throws IOException { if (mStream==null) { throw new IOException(Messages.STREAM_CLOSED.getText()); } if (mReader!=null) { return; } if (getDecodingStrategy()==null) { mSignatureCharset=getRequestedSignatureCharset(); } else { byte[] consumed=new byte[Signature.getLongestLength()]; int count=mStream.read(consumed); if (count==-1) { count=0; } byte[] header=Arrays.copyOf(consumed,count); mSignatureCharset=getDecodingStrategy().getPrefixMatch(header); mStream.unread(header); } if ((mSignatureCharset!=null) && (!mSignatureCharset.isSupported())) { mSignatureCharset=null; } if (mSignatureCharset!=null) { int len=mSignatureCharset.getSignature().getLength(); long left=len; long skipped=1; while ((left>0) && (skipped>0)) { skipped=mStream.skip(left); left-=skipped; } mReader=new InputStreamReader (mStream,mSignatureCharset.getCharset().getCharset()); } else { mReader=new InputStreamReader(mStream); } } }