package org.archive.wayback.replay.charset; import java.io.IOException; import junit.framework.TestCase; import org.archive.io.warc.TestWARCReader; import org.archive.io.warc.TestWARCRecordInfo; import org.archive.io.warc.WARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.resourcestore.resourcefile.WarcResource; public class ByteOrderMarkSnifferTest extends TestCase { ByteOrderMarkSniffer cut; Resource resource; protected void setUp() throws Exception { cut = new ByteOrderMarkSniffer(); } protected void setupResource(byte[] payload) throws IOException { TestWARCRecordInfo recinfo = TestWARCRecordInfo.createHttpResponse("text/html", payload); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); resource = new WarcResource(rec, ar); resource.parseHeaders(); } public void testUTF16BE() throws Exception { setupResource(new byte[] { (byte)0xFE, (byte)0xFF, '<', 'H', 'T', 'M', 'L', '>', '<', '/', 'H', 'T', 'M', 'L', '>' }); String detected = cut.sniff(resource); assertEquals("UTF-16BE", detected); } public void testUTF16LE() throws Exception { setupResource(new byte[] { (byte)0xFF, (byte)0xFE, '<', 'H', 'T', 'M', 'L', '>', '<', '/', 'H', 'T', 'M', 'L', '>' }); String detected = cut.sniff(resource); assertEquals("UTF-16LE", detected); } public void testUTF8() throws Exception { setupResource(new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF, '<', 'H', 'T', 'M', 'L', '>', '<', '/', 'H', 'T', 'M', 'L', '>' }); String detected = cut.sniff(resource); assertEquals("UTF-8", detected); } public void testNoBOM() throws Exception { setupResource(new byte[] { '<', 'H', 'T', 'M', 'L', '>', '<', '/', 'H', 'T', 'M', 'L', '>' }); String detected = cut.sniff(resource); assertNull(detected); } }