package org.archive.wayback.replay.charset; import java.io.IOException; import org.archive.wayback.core.Resource; /** * {@link EncodingSniffer} that peek the content for * Byte Order Mark bytes. * <p>This is the step 3 of character encoding sniffing * prescribed by WHAT-NG.</p> */ public class ByteOrderMarkSniffer extends BaseEncodingSniffer { public static final int MAX_BOM_LEN = 3; @Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }