package de.axone.tools; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import de.axone.data.Charsets; public class Encodings { public static boolean isWrongEncodedUtf8( String string ){ boolean ok = true; // Ignore last char because it wouldn't start an utf-8 string for( int i = 0; i < string.length()-1; i++ ){ int c = string.charAt( i ); int c1 = string.charAt( i+1 ); // Known pattern for utf-8 two byte string if( ((c & 0xE0) == 0xC0) && ((c1 & 0xC0) == 0x80 ) ){ ok = false; break; } } return !ok; } /** * Take one string which my be wrongly encoded and convert * it to an usable String. * * @param string * @return the converted string * @throws IOException */ public static String convertToUsableString( String string ) throws IOException{ if( isWrongEncodedUtf8( string ) ){ byte[] buffer = new byte[ string.length() ]; for( int i = 0; i < string.length(); i++ ){ buffer[ i ] = (byte)string.charAt( i ); } ByteArrayInputStream bIn = new ByteArrayInputStream( buffer ); Reader rIn = new InputStreamReader( bIn, Charsets.UTF8 ); char [] cBuf = new char[ string.length() ]; int l = rIn.read( cBuf ); //throw new IllegalArgumentException( "result would be larger then return new String( cBuf, 0, l ); } else { return string; } } }