/******************************************************************************* * Copyright (c) 2004, 2006 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package org.eclipse.wst.xml.tests.encoding.xml; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.MalformedInputException; import junit.framework.TestCase; import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail; import org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector; import org.eclipse.wst.xml.tests.encoding.TestsPlugin; /** * FIXME: this might be a good starting point to create a "file peeker"? But, * its not otherwised used -- delete if not fixed/improved soon * XMLMalformedInputTests */ public class XMLMalformedInputTests extends TestCase { private int READ_BUFFER_SIZE = 8000; private boolean DEBUG = false; public XMLMalformedInputTests(String name) { super(name); } /** * Tests for a file, filename that should throw a * MalformedInputExceptionWithDetail at character, expectedPosition. This * happens when no encoding is specified, so the default is used, but * there are characters that the default encoding does not recognize */ void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException { Exception foundException = null; int badCharPosition = -1; File file = TestsPlugin.getTestFile(filename); if (!file.exists()) throw new IllegalArgumentException(filename + " was not found"); InputStream inputStream = new FileInputStream(file); InputStream istream = getMarkSupportedStream(inputStream); detector.set(istream); // IEncodedDocument doc = // detector.createNewStructuredDocument(filename, istream); EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento(); String foundIANAEncoding = encodingMemento.getJavaCharsetName(); Charset charset = Charset.forName(foundIANAEncoding); CharsetDecoder charsetDecoder = charset.newDecoder(); charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); istream.close(); inputStream.close(); // now, try reading as per encoding inputStream = new FileInputStream(file); // skip BOM for this case // System.out.println(inputStream.read()); // System.out.println(inputStream.read()); // System.out.println(inputStream.read()); InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder); try { // just try reading ... should throw exception // exception) readInputStream(reader); } catch (MalformedInputException e) { // as expected, now do detailed checking. inputStream.close(); istream.close(); inputStream = new FileInputStream(file); charsetDecoder = charset.newDecoder(); charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); reader = new InputStreamReader(inputStream, charsetDecoder); istream = getMarkSupportedStream(inputStream); try { handleMalFormedInput_DetailChecking(reader, foundIANAEncoding); } catch (MalformedInputExceptionWithDetail se) { foundException = se; badCharPosition = se.getCharPosition(); } } finally { if (istream != null) { istream.close(); } if (inputStream != null) { inputStream.close(); } } // handle adjustments here for VM differnces: // for now its either 49 or 49 + 2 BOMs (51) // can be smarting later. assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail); assertTrue("Wrong character position detected in MalformedInputException. Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) || badCharPosition == expectedPosition - 2); } // public void testXSLMalformedInput() throws IOException { // String filename = fileLocation + "MalformedNoEncoding.xsl"; // doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 211); // } /** * Ensures that an InputStream has mark/reset support. */ private static InputStream getMarkSupportedStream(InputStream original) { if (original == null) return null; if (original.markSupported()) return original; return new BufferedInputStream(original); } private StringBuffer readInputStream(Reader reader) throws IOException { StringBuffer buffer = new StringBuffer(); int numRead = 0; char tBuff[] = new char[READ_BUFFER_SIZE]; while ((numRead = reader.read(tBuff, 0, tBuff.length)) != -1) { if (DEBUG) { System.out.println(tBuff[0]); } buffer.append(tBuff, 0, numRead); } return buffer; } /* * removed for PPC machine with IBM VM * https://bugs.eclipse.org/bugs/show_bug.cgi?id=126503 */ // public void testXMLMalformedInput() throws IOException { // String filename = fileLocation + "MalformedNoEncoding.xml"; // doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 51); // } // since above test was only one im this class, put in this no op to avoid // a failure due to no tests in class! public void testNoOp() { assertTrue(true); } private void handleMalFormedInput_DetailChecking(Reader reader, String encodingName) throws IOException, MalformedInputExceptionWithDetail { int charPostion = -1; charPostion = getCharPostionOfFailure(reader); // all of that just to throw more accurate error // note: we do the conversion to ianaName, instead of using the local // variable, // because this is ultimately only for the user error message (that // is, // the error occurred // in context of javaEncodingName no matter what ianaEncodingName is throw new MalformedInputExceptionWithDetail(encodingName, charPostion); } private int getCharPostionOfFailure(Reader reader) throws IOException { int charPosition = 1; int charRead = -1; int result = -1; boolean errorFound = false; do { try { if (reader.ready()) { charRead = reader.read(); } if (DEBUG) { System.out.println(charPosition + ": " + escape((char) charRead, true)); } charPosition++; } catch (MalformedInputException e) { // this is expected, since we're expecting failure, // so no need to do anything. errorFound = true; break; } } while ((charRead != -1 && !errorFound) && reader.ready()); if (errorFound) result = charPosition; else result = -1; return result; } private String escape(char aChar, boolean escapeSpace) { StringBuffer outBuffer = new StringBuffer(); switch (aChar) { case ' ' : if (escapeSpace) outBuffer.append('\\'); outBuffer.append(' '); break; case '\\' : outBuffer.append('\\'); outBuffer.append('\\'); break; case '\t' : outBuffer.append('\\'); outBuffer.append('t'); break; case '\n' : outBuffer.append('\\'); outBuffer.append('n'); break; case '\r' : outBuffer.append('\\'); outBuffer.append('r'); break; case '\f' : outBuffer.append('\\'); outBuffer.append('f'); break; default : if ((aChar < 0x0020) || (aChar > 0x007e)) { outBuffer.append('\\'); outBuffer.append('u'); outBuffer.append(toHex((aChar >> 12) & 0xF)); outBuffer.append(toHex((aChar >> 8) & 0xF)); outBuffer.append(toHex((aChar >> 4) & 0xF)); outBuffer.append(toHex(aChar & 0xF)); } else { if (specialSaveChars.indexOf(aChar) != -1) outBuffer.append('\\'); outBuffer.append(aChar); } } return outBuffer.toString(); } /** * Convert a nibble to a hex character * * @param nibble * the nibble to convert. */ private static char toHex(int nibble) { return hexDigit[(nibble & 0xF)]; } /** A table of hex digits */ private static final char[] hexDigit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; private static final String specialSaveChars = "=: \t\r\n\f#!"; }