/*!
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2013 Pentaho Corporation.. All rights reserved.
*/
package org.pentaho.platform.dataaccess.datasource.wizard.csv;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
/**
* @deprecated use classes from the ICU project instead {@link com.ibm.icu.text.CharsetDetector#getReader(java.io
* .InputStream,
* String)}
* <p/>
* The <code>UnicodeBOMInputStream</code> class wraps any <code>InputStream</code> and detects the presence of any
* Unicode BOM (Byte Order Mark) at its beginning, as defined by <a href="http://www.faqs.org/rfcs/rfc3629.html">RFC
* 3629 - UTF-8, a transformation format of ISO 10646</a>
* <p/>
* <p>The <a href="http://www.unicode.org/unicode/faq/utf_bom.html">Unicode FAQ</a> defines 5 types of BOMs:<ul>
* <li><pre>00 00 FE FF = UTF-32, big-endian</pre></li>
* <li><pre>FF FE 00 00 = UTF-32, little-endian</pre></li>
* <li><pre>FE FF = UTF-16, big-endian</pre></li>
* <li><pre>FF FE = UTF-16, little-endian</pre></li>
* <li><pre>EF BB BF = UTF-8</pre></li>
* </ul></p>
* <p/>
* <p>Use the {@link #getBOM()} method to know whether a BOM has been detected or not. </p> <p>Use the {@link
* #skipBOM()} method to remove the detected BOM from the wrapped <code>InputStream</code> object.</p>
*/
@Deprecated
public class UnicodeBOMInputStream extends InputStream {
/**
* Type safe enumeration class that describes the different types of Unicode BOMs.
*/
public static final class BOM {
/**
* NONE.
*/
public static final BOM NONE = new BOM( new byte[] { }, "NONE" );
/**
* UTF-8 BOM (EF BB BF).
*/
public static final BOM UTF_8 = new BOM( new byte[] { (byte) 0xEF,
(byte) 0xBB,
(byte) 0xBF },
"UTF-8" );
/**
* UTF-16, little-endian (FF FE).
*/
public static final BOM UTF_16_LE = new BOM( new byte[] { (byte) 0xFF,
(byte) 0xFE },
"UTF-16 little-endian" );
/**
* UTF-16, big-endian (FE FF).
*/
public static final BOM UTF_16_BE = new BOM( new byte[] { (byte) 0xFE,
(byte) 0xFF },
"UTF-16 big-endian" );
/**
* UTF-32, little-endian (FF FE 00 00).
*/
public static final BOM UTF_32_LE = new BOM( new byte[] { (byte) 0xFF,
(byte) 0xFE,
(byte) 0x00,
(byte) 0x00 },
"UTF-32 little-endian" );
/**
* UTF-32, big-endian (00 00 FE FF).
*/
public static final BOM UTF_32_BE = new BOM( new byte[] { (byte) 0x00,
(byte) 0x00,
(byte) 0xFE,
(byte) 0xFF },
"UTF-32 big-endian" );
/**
* Returns a <code>String</code> representation of this <code>BOM</code> value.
*/
public final String toString() {
return description;
}
/**
* Returns the bytes corresponding to this <code>BOM</code> value.
*/
public final byte[] getBytes() {
final int length = bytes.length;
final byte[] result = new byte[ length ];
// Make a defensive copy
System.arraycopy( bytes, 0, result, 0, length );
return result;
}
private BOM( final byte[] bom, final String description ) {
assert ( bom != null ) : "invalid BOM: null is not allowed";
assert ( description != null ) : "invalid description: null is not allowed";
assert ( description.length() != 0 ) : "invalid description: empty string is not allowed";
this.bytes = bom;
this.description = description;
}
final byte[] bytes;
private final String description;
} // BOM
/**
* Constructs a new <code>UnicodeBOMInputStream</code> that wraps the specified <code>InputStream</code>.
*
* @param inputStream an <code>InputStream</code>.
* @throws NullPointerException when <code>inputStream</code> is <code>null</code>.
* @throws IOException on reading from the specified <code>InputStream</code> when trying to detect the
* Unicode BOM.
*/
public UnicodeBOMInputStream( final InputStream inputStream ) throws NullPointerException,
IOException {
if ( inputStream == null ) {
throw new NullPointerException( "invalid input stream: null is not allowed" );
}
in = new PushbackInputStream( inputStream, 4 );
final byte[] bom = new byte[ 4 ];
final int read = in.read( bom );
switch( read ) {
case 4:
if ( ( bom[ 0 ] == (byte) 0xFF )
&& ( bom[ 1 ] == (byte) 0xFE )
&& ( bom[ 2 ] == (byte) 0x00 )
&& ( bom[ 3 ] == (byte) 0x00 ) ) {
this.bom = BOM.UTF_32_LE;
break;
} else if ( ( bom[ 0 ] == (byte) 0x00 )
&& ( bom[ 1 ] == (byte) 0x00 )
&& ( bom[ 2 ] == (byte) 0xFE )
&& ( bom[ 3 ] == (byte) 0xFF ) ) {
this.bom = BOM.UTF_32_BE;
break;
}
case 3:
if ( ( bom[ 0 ] == (byte) 0xEF )
&& ( bom[ 1 ] == (byte) 0xBB )
&& ( bom[ 2 ] == (byte) 0xBF ) ) {
this.bom = BOM.UTF_8;
break;
}
case 2:
if ( ( bom[ 0 ] == (byte) 0xFF )
&& ( bom[ 1 ] == (byte) 0xFE ) ) {
this.bom = BOM.UTF_16_LE;
break;
} else if ( ( bom[ 0 ] == (byte) 0xFE )
&& ( bom[ 1 ] == (byte) 0xFF ) ) {
this.bom = BOM.UTF_16_BE;
break;
}
default:
this.bom = BOM.NONE;
break;
}
if ( read > 0 ) {
in.unread( bom, 0, read );
}
}
/**
* Returns the <code>BOM</code> that was detected in the wrapped <code>InputStream</code> object.
*
* @return a <code>BOM</code> value.
*/
public final BOM getBOM() {
// BOM type is unmutable.
return bom;
}
/**
* Skips the <code>BOM</code> that was found in the wrapped <code>InputStream</code> object.
*
* @return this <code>UnicodeBOMInputStream</code>.
* @throws IOException when trying to skip the BOM from the wrapped <code>InputStream</code> object.
*/
public final synchronized UnicodeBOMInputStream skipBOM() throws IOException {
if ( !skipped ) {
in.skip( bom.bytes.length );
skipped = true;
}
return this;
}
/**
* {@inheritDoc}
*/
public int read() throws IOException {
return in.read();
}
/**
* {@inheritDoc}
*/
public int read( final byte[] b ) throws IOException,
NullPointerException {
return in.read( b, 0, b.length );
}
/**
* {@inheritDoc}
*/
public int read( final byte[] b,
final int off,
final int len ) throws IOException,
NullPointerException {
return in.read( b, off, len );
}
/**
* {@inheritDoc}
*/
public long skip( final long n ) throws IOException {
return in.skip( n );
}
/**
* {@inheritDoc}
*/
public int available() throws IOException {
return in.available();
}
/**
* {@inheritDoc}
*/
public void close() throws IOException {
in.close();
}
/**
* {@inheritDoc}
*/
public synchronized void mark( final int readlimit ) {
in.mark( readlimit );
}
/**
* {@inheritDoc}
*/
public synchronized void reset() throws IOException {
in.reset();
}
/**
* {@inheritDoc}
*/
public boolean markSupported() {
return in.markSupported();
}
private final PushbackInputStream in;
private final BOM bom;
private boolean skipped = false;
} // UnicodeBOMInputStream