/* * Copyright 2013 Skynav, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SKYNAV, INC. AND ITS CONTRIBUTORS “AS IS” AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL SKYNAV, INC. OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.skynav.xml.helpers; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import com.skynav.xml.helpers.Sniffer; public class SnifferTestCase { private enum SpaceAroundEquals { None, One, Mixed }; static private final int[] bomEmpty = null; private static Charset asciiCharset; static { try { asciiCharset = Charset.forName("US-ASCII"); } catch (RuntimeException e) { asciiCharset = null; } } static private final Object[][] asciiTests = new Object[][] { // no encoding { bomEmpty, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "us-ascii", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "us-ascii", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "us-ascii", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "us-ascii", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "us-ascii", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "us-ascii", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "US-ASCII", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Us-Ascii", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Us-AsCiI", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "ascii", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffASCII() throws Exception { int testIndex = 0; for (Object[] test : asciiTests) { ByteBuffer bb = makeByteBuffer("US-ASCII", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, asciiCharset); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "US-ASCII"); ++testIndex; } } static private final int[] bomUTF8 = new int[] { 0xEF, 0xBB, 0xBF }; static private final Object[][] utf8Tests = new Object[][] { // BOM only { bomUTF8, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "utf-8", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "utf-8", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "utf-8", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "UTF-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Utf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "uTf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, // both BOM and encoding { bomUTF8, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF8, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomUTF8, "utf-8", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomUTF8, "utf-8", Character.valueOf('\''), SpaceAroundEquals.None }, { bomUTF8, "utf-8", Character.valueOf('\''), SpaceAroundEquals.One }, { bomUTF8, "utf-8", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomUTF8, "UTF-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF8, "Utf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF8, "uTf-8", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffUTF8() throws Exception { int testIndex = 0; for (Object[] test : utf8Tests) { ByteBuffer bb = makeByteBuffer("UTF-8", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "UTF-8"); ++testIndex; } } static private final int[] bomUTF16LE = new int[] { 0xFF, 0xFE }; static private final Object[][] utf16LETests = new Object[][] { // BOM only { bomUTF16LE, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "UTF-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Utf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "uTf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, // both BOM and encoding { bomUTF16LE, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16LE, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomUTF16LE, "utf-16le", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomUTF16LE, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.None }, { bomUTF16LE, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.One }, { bomUTF16LE, "utf-16le", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomUTF16LE, "UTF-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16LE, "Utf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16LE, "uTf-16le", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffUTF16LE() throws Exception { int testIndex = 0; for (Object[] test : utf16LETests) { ByteBuffer bb = makeByteBuffer("UTF-16LE", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "UTF-16LE"); ++testIndex; } } static private final int[] bomUTF16BE = new int[] { 0xFE, 0xFF }; static private final Object[][] utf16BETests = new Object[][] { // BOM only { bomUTF16BE, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "UTF-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Utf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "uTf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, // both BOM and encoding { bomUTF16BE, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16BE, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomUTF16BE, "utf-16be", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomUTF16BE, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.None }, { bomUTF16BE, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.One }, { bomUTF16BE, "utf-16be", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomUTF16BE, "UTF-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16BE, "Utf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF16BE, "uTf-16be", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffUTF16BE() throws Exception { int testIndex = 0; for (Object[] test : utf16BETests) { ByteBuffer bb = makeByteBuffer("UTF-16BE", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "UTF-16BE"); ++testIndex; } } static private final int[] bomUTF32LE = new int[] { 0xFF, 0xFE, 0x00, 0x00 }; static private final Object[][] utf32LETests = new Object[][] { // BOM only { bomUTF32LE, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "UTF-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Utf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "uTf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, // both BOM and encoding { bomUTF32LE, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32LE, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomUTF32LE, "utf-32le", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomUTF32LE, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.None }, { bomUTF32LE, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.One }, { bomUTF32LE, "utf-32le", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomUTF32LE, "UTF-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32LE, "Utf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32LE, "uTf-32le", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffUTF32LE() throws Exception { int testIndex = 0; for (Object[] test : utf32LETests) { ByteBuffer bb = makeByteBuffer("UTF-32LE", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "UTF-32LE"); ++testIndex; } } static private final int[] bomUTF32BE = new int[] { 0x00, 0x00, 0xFE, 0xFF }; static private final Object[][] utf32BETests = new Object[][] { // BOM only { bomUTF32BE, "", Character.valueOf('\"'), SpaceAroundEquals.None }, // encoding only { bomEmpty, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomEmpty, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomEmpty, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.None }, { bomEmpty, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.One }, { bomEmpty, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomEmpty, "UTF-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "Utf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomEmpty, "uTf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, // both BOM and encoding { bomUTF32BE, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32BE, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.One }, { bomUTF32BE, "utf-32be", Character.valueOf('\"'), SpaceAroundEquals.Mixed }, { bomUTF32BE, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.None }, { bomUTF32BE, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.One }, { bomUTF32BE, "utf-32be", Character.valueOf('\''), SpaceAroundEquals.Mixed }, { bomUTF32BE, "UTF-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32BE, "Utf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, { bomUTF32BE, "uTf-32be", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffUTF32BE() throws Exception { int testIndex = 0; for (Object[] test : utf32BETests) { ByteBuffer bb = makeByteBuffer("UTF-32BE", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), "UTF-32BE"); ++testIndex; } } static private final Object[][] otherEncodingSansBOMTests = new Object[][] { // encoding only { bomEmpty, "ISO-8859-1", Character.valueOf('\"'), SpaceAroundEquals.None }, }; @Test public void testSniffOther() throws Exception { int testIndex = 0; for (Object[] test : otherEncodingSansBOMTests) { ByteBuffer bb = makeByteBuffer((String)test[1], test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNotNull(cs); assertEquals("Test Index " + testIndex, cs.name(), test[1]); ++testIndex; } } static private final String[] invalidXMLDeclarationTests = new String[] { "", // missing xml declaration prefix "\u0000xml?>", // invalid xml declaration prefix "?xml?>", // invalid xml declaration prefix "<?xml?>", // missing encoding pseudo-attribute "<?xml encoding?>", // missing equals "<?xml encoding=?>", // missing quoted encoding value "<?xml encoding=\"?>", // missing encoding value and terminating quote "<?xml encoding=\'?>", // missing encoding value and terminating quote "<?xml encoding=\"1\"?>", // invalid initial character of encoding name (must be ascii letter) "<?xml encoding=\"_\"?>", // invalid initial character of encoding name (must be ascii letter) "<?xml encoding=\"A+\"?>", // invalid following character of encoding name (must be ascii letter, digit, [._-]) }; @Test public void testSniffInvalidXMLDeclaration() throws Exception { int testIndex = 0; for (String test : invalidXMLDeclarationTests) { ByteBuffer bb = makeByteBuffer("US-ASCII", test); assertNotNull(bb); Charset cs = Sniffer.sniff(bb, null); assertNull("Test Index " + testIndex, cs); ++testIndex; } } private static void addSpaceAroundEquals(StringBuffer sb, SpaceAroundEquals spaceAround, boolean beforeEquals) { if (spaceAround == SpaceAroundEquals.One) sb.append(' '); else if (spaceAround == SpaceAroundEquals.Mixed) { if (beforeEquals) sb.append('\n'); else { sb.append(' '); sb.append('\t'); sb.append(' '); } } } private static String makeXMLDeclaration(String version, String encoding, char quote, SpaceAroundEquals spaceAround) { StringBuffer sb = new StringBuffer(); sb.append("<?xml"); if (version != null) { sb.append(" version"); addSpaceAroundEquals(sb, spaceAround, true); sb.append('='); addSpaceAroundEquals(sb, spaceAround, false); sb.append(quote); sb.append(version); sb.append(quote); } if ((encoding != null) && (encoding.length() != 0)) { sb.append(" encoding"); addSpaceAroundEquals(sb, spaceAround, true); sb.append('='); addSpaceAroundEquals(sb, spaceAround, false); sb.append(quote); sb.append(encoding); sb.append(quote); } sb.append("?>"); return sb.toString(); } private static ByteBuffer makeByteBuffer(String xmlDeclEncoding, Object[] test) { ByteArrayOutputStream os = new ByteArrayOutputStream(); int[] bom = (int[]) test[0]; if (bom != null) { for (int i = 0; i < bom.length; ++i) { int b = bom[i]; assert b < 256; os.write(b); } } String xmlDecl = makeXMLDeclaration("1.0", (String) test[1], (Character) test[2], (SpaceAroundEquals) test[3]); try { byte[] bytes = xmlDecl.getBytes(Charset.forName(xmlDeclEncoding)); os.write(bytes, 0, bytes.length); } catch (UnsupportedCharsetException e) { return null; } return ByteBuffer.wrap(os.toByteArray()); } private static ByteBuffer makeByteBuffer(String xmlDeclEncoding, String xmlDecl) { ByteArrayOutputStream os = new ByteArrayOutputStream(); try { byte[] bytes = xmlDecl.getBytes(Charset.forName(xmlDeclEncoding)); os.write(bytes, 0, bytes.length); } catch (UnsupportedCharsetException e) { return null; } return ByteBuffer.wrap(os.toByteArray()); } }