// Copied from Slipstream Mod Manager 1.4. // (Excerpts from ModUtilities) package com.ftloverdrive.util; import java.util.Arrays; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.LinkedHashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TextUtilities { /** * Determines text encoding for an InputStream and decodes its bytes as a string. * * CR and CR-LF line endings will be normalized to LF. * * @param is a stream to read * @param description how error messages should refer to the stream, or null */ public static DecodeResult decodeText( InputStream is, String description ) throws IOException { String result = null; byte[] buf = new byte[4096]; int len; ByteArrayOutputStream tmpData = new ByteArrayOutputStream(); while ( (len = is.read(buf)) >= 0 ) { tmpData.write( buf, 0, len ); } byte[] allBytes = tmpData.toByteArray(); tmpData.reset(); Map<byte[],String> boms = new LinkedHashMap<byte[],String>(); boms.put( new byte[] {(byte)0xEF,(byte)0xBB,(byte)0xBF}, "UTF-8" ); boms.put( new byte[] {(byte)0xFF,(byte)0xFE}, "UTF-16LE" ); boms.put( new byte[] {(byte)0xFE,(byte)0xFF}, "UTF-16BE" ); String encoding = null; byte[] bom = null; for ( Map.Entry<byte[],String> entry : boms.entrySet() ) { byte[] tmpBom = entry.getKey(); byte[] firstBytes = Arrays.copyOfRange( allBytes, 0, tmpBom.length ); if ( Arrays.equals( tmpBom, firstBytes ) ) { encoding = entry.getValue(); bom = tmpBom; break; } } if ( encoding != null ) { // This may throw CharacterCodingException. CharsetDecoder decoder = Charset.forName( encoding ).newDecoder(); ByteBuffer byteBuffer = ByteBuffer.wrap( allBytes, bom.length, allBytes.length-bom.length ); result = decoder.decode( byteBuffer ).toString(); } else { ByteBuffer byteBuffer = ByteBuffer.wrap( allBytes ); Map<String,Exception> errorMap = new LinkedHashMap<String,Exception>(); for ( String guess : new String[] {"UTF-8", "windows-1252"} ) { try { byteBuffer.rewind(); byteBuffer.limit( allBytes.length ); CharsetDecoder decoder = Charset.forName( guess ).newDecoder(); result = decoder.decode( byteBuffer ).toString(); encoding = guess; break; } catch ( CharacterCodingException e ) { errorMap.put( guess, e ); } } if ( encoding == null ) { // All guesses failed!? String msg = String.format( "Could not guess encoding for %s.", (description!=null ? "\""+description+"\"" : "a file") ); for ( Map.Entry<String,Exception> entry : errorMap.entrySet() ) { msg += String.format( "\nFailed to decode as %s: %s", entry.getKey(), entry.getValue() ); } throw new IOException( msg ); } } // Determine the original line endings. int eol = DecodeResult.EOL_NONE; Matcher m = Pattern.compile( "(\r(?!\n))|((?<!\r)\n)|(\r\n)" ).matcher( result ); if ( m.find() ) { if ( m.group(3) != null ) eol = DecodeResult.EOL_CRLF; else if ( m.group(2) != null ) eol = DecodeResult.EOL_LF; else if ( m.group(1) != null ) eol = DecodeResult.EOL_CR; } result = result.replaceAll( "\r(?!\n)|\r\n", "\n" ); return new DecodeResult( result, encoding, eol, bom ); } /** * A holder for results from decodeText(). * * text - The decoded string. * encoding - The encoding used. * eol - A constant describing the original line endings. * bom - The BOM bytes found, or null. */ public static class DecodeResult { public static final int EOL_NONE = 0; public static final int EOL_CRLF = 1; public static final int EOL_LF = 2; public static final int EOL_CR = 3; public final String text; public final String encoding; public final int eol; public final byte[] bom; public DecodeResult( String text, String encoding, int eol, byte[] bom ) { this.text = text; this.encoding = encoding; this.eol = eol; this.bom = bom; } public String getEOLName() { if ( eol == EOL_CRLF ) return "CR-LF"; if ( eol == EOL_LF ) return "LF"; if ( eol == EOL_CR ) return "CR"; return "None"; } } }