// BlogBridge -- RSS feed reader, manager, and web based service // Copyright (C) 2002-2006 by R. Pito Salas // // This program is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free Software Foundation; // either version 2 of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU General Public License for more details. // // You should have received a copy of the GNU General Public License along with this program; // if not, write to the Free Software Foundation, Inc., 59 Temple Place, // Suite 330, Boston, MA 02111-1307 USA // // Contact: R. Pito Salas // mailto:pitosalas@users.sourceforge.net // More information: about BlogBridge // http://www.blogbridge.com // http://sourceforge.net/projects/blogbridge // // $Id: TestEncodingDetector.java,v 1.3 2006/01/08 05:28:19 kyank Exp $ // package com.salas.bb.utils.xml; import junit.framework.TestCase; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; /** * @see EncodingDetector */ public class TestEncodingDetector extends TestCase { /** * Tests normal xml without BOM and declaration header. */ public void testNormal() { check("<a></a>", "UTF-8", '<'); check("<xmla></xmla>", "UTF-8", '<'); } /** * Tests BOM-based detection. */ public void testBOM() { check(new String(new char[] { (char)0xff, (char)0xfe, ' ', 0, '1', 0}), "UTF-16", ' '); check(new String(new char[] { (char)0xfe, (char)0xff, ' ', 0, '1', 0}), "UTF-16", ' '); check(new String(new char[] { (char)0xef, (char)0xbb, (char)0xbf, '1', 0}), "UTF-8", '1'); } /** * Tests detection based on bytes order. */ public void testBytes() { check(new String(new char[] { (char)0, '<', (char)0, 'a', (char)0, '>' }), "UnicodeBig", (char)0); check(new String(new char[] { '<', (char)0, 'a', (char)0, '>', (char)0 }), "UnicodeLittle", '<'); } /** * Tests declaration-based detection. */ public void testDeclaration() { check("<?xml version='1.0'?>", "UTF-8", '<'); check("<?xml version=\"1.0\"?>", "UTF-8", '<'); check("<?xml encoding='windows-1251' version=\"1.0\"?>", "windows-1251", '<'); check("<?xml encoding=\"windows-1251\" version=\"1.0\"?>", "windows-1251", '<'); check("<?xml encoding = \"windows-1251\" version=\"1.0\"?>", "windows-1251", '<'); check("<?xml version=\"1.0\" encoding='windows-1251'?>", "windows-1251", '<'); check("<?xml version=\"1.0\" encoding=\"windows-1251\"?>", "windows-1251", '<'); check("<?xml version=\"1.0\" encoding = \"windows-1251\"?>", "windows-1251", '<'); check("<?xml encoding=' windows-1251 ' ?>", "windows-1251", '<'); } /** * Tests reading of declaration header. * * @throws Exception in case of errors. */ public void testReadDeclarationHeader() throws Exception { assertFalse(EncodingDetector.readDeclarationHeader(null)); assertFalse(EncodingDetector.readDeclarationHeader(streamForString("<?xmla"))); assertFalse(EncodingDetector.readDeclarationHeader(streamForString("<?xmm "))); assertFalse(EncodingDetector.readDeclarationHeader(streamForString(" <?xml "))); assertFalse(EncodingDetector.readDeclarationHeader(streamForString("<? xml "))); assertFalse(EncodingDetector.readDeclarationHeader(streamForString("<?xml"))); assertTrue(EncodingDetector.readDeclarationHeader(streamForString("<?xml "))); } /** * Tests reading attribute values with optional spaces. * * @throws Exception in case of errors. */ public void testReadAttributeValue() throws Exception { InputStream is; StringBuffer buf = null; assertEquals(-1, EncodingDetector.readAttributeValue(null, buf)); // Empty stream is = streamForString(""); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValue(is, buf)); assertEquals(0, buf.length()); // Empty stream is = streamForString(" 'abc' "); buf = new StringBuffer(); assertEquals(' ', EncodingDetector.readAttributeValue(is, buf)); assertEquals("abc", buf.toString()); } /** * Tests reading of encoding attribute. * * @throws Exception in case of errors. */ public void testReadAttributeValueString() throws Exception { InputStream is; assertEquals(null, EncodingDetector.readAttributeValue(null, (String)null)); assertEquals(null, EncodingDetector.readAttributeValue(null, "encoding")); // Empty stream is = streamForString(""); assertEquals(null, EncodingDetector.readAttributeValue(is, (String)null)); assertEquals(null, EncodingDetector.readAttributeValue(is, "encoding")); // No attribute assertEquals(null, EncodingDetector.readAttributeValue(streamForString("encoding?>"), "encoding")); assertEquals(null, EncodingDetector.readAttributeValue(streamForString("encoding ?>"), "encoding")); assertEquals(null, EncodingDetector.readAttributeValue(streamForString("encodin='a' ?>"), "encoding")); assertEquals(null, EncodingDetector.readAttributeValue(streamForString("encoding enc='a'?>"), "encoding")); // Attribute present assertEquals("utf-8", EncodingDetector.readAttributeValue(streamForString("version='1.0' encoding='utf-8'?>"), "encoding")); assertEquals("utf-8", EncodingDetector.readAttributeValue(streamForString("version='1.0' encoding = 'utf-8' ?>"), "encoding")); assertEquals("utf-8", EncodingDetector.readAttributeValue(streamForString("version encoding = 'utf-8' ?>"), "encoding")); assertEquals("utf-8", EncodingDetector.readAttributeValue(streamForString("encoding = 'utf-8' version?>"), "encoding")); } /** * Tests reading of attribute values without spaces in the beginning. * * @throws Exception in case of errors. */ public void testReadAttributeValueNoSpace() throws Exception { InputStream is; StringBuffer buf; // Empty stream is = streamForString(""); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, -1, buf)); assertEquals(0, buf.length()); // Unexpected chars -- non-quotes: "a" is = streamForString(""); buf = new StringBuffer(); assertEquals('a', EncodingDetector.readAttributeValueNoSpace(is, 'a', buf)); assertEquals(0, buf.length()); // Unclosed quotes: "'abc" is = streamForString("abc"); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); assertEquals(0, buf.length()); // Unclosed quotes: '"abc' is = streamForString("abc"); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); assertEquals(0, buf.length()); // Success is = streamForString("abc'"); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); assertEquals("abc", buf.toString()); // Success is = streamForString("abc\""); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); assertEquals("abc", buf.toString()); // Success is = streamForString("a'b'c\""); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); assertEquals("a'b'c", buf.toString()); // Success is = streamForString("a\"b\"c'"); buf = new StringBuffer(); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); assertEquals("a\"b\"c", buf.toString()); // Success is = streamForString("abc' "); buf = new StringBuffer(); assertEquals(' ', EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); assertEquals("abc", buf.toString()); } /** * Tests skipping of attributes values. * * @throws Exception in case of errors. */ public void testSkipAttributeValue() throws Exception { InputStream is; StringBuffer buf = null; // Empty stream is = streamForString(""); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, -1, buf)); // Unexpected chars -- non-quotes: "a" is = streamForString(""); assertEquals('a', EncodingDetector.readAttributeValueNoSpace(is, 'a', buf)); // Unclosed quotes: "'abc" is = streamForString("abc"); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); // Unclosed quotes: '"abc' is = streamForString("abc"); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); // Success is = streamForString("abc'"); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); // Success is = streamForString("abc\""); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); // Success is = streamForString("a'b'c\""); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '"', buf)); // Success is = streamForString("a\"b\"c'"); assertEquals(-1, EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); // Success is = streamForString("abc' "); assertEquals(' ', EncodingDetector.readAttributeValueNoSpace(is, '\'', buf)); } /** * Tests skipping whitespaces. * * @throws Exception in case of errors. */ public void testSkipWhitespace() throws Exception { InputStream is; assertEquals(-1, EncodingDetector.skipWhitepace(null)); is = streamForString(""); assertEquals(-1, EncodingDetector.skipWhitepace(is)); is = streamForString("a"); assertEquals('a', EncodingDetector.skipWhitepace(is)); is = streamForString(" b"); assertEquals('b', EncodingDetector.skipWhitepace(is)); is = streamForString("\t\t\tc"); assertEquals('c', EncodingDetector.skipWhitepace(is)); is = streamForString("\n\n\nd"); assertEquals('d', EncodingDetector.skipWhitepace(is)); } /** Creates stream for string. */ private InputStream streamForString(String text) { char[] chars = text.toCharArray(); byte[] bytes = new byte[chars.length]; for (int i = 0; i < chars.length; i++) bytes[i] = (byte)chars[i]; return new ByteArrayInputStream(bytes); } /** Checks the detection. */ private void check(String xml, String encoding, char firstCharInStream) { InputStream is = streamForString(xml); try { EncodingDetector.DetectionResult result = EncodingDetector.detectEncoding(is); assertEquals(encoding, result.getEncoding()); assertEquals(firstCharInStream, (char)result.getStream().read()); } catch (IOException e) { e.printStackTrace(); fail(); } } }