/* * Copyright 2007-2008 Amazon Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://aws.amazon.com/apache2.0 * * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES * OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and * limitations under the License. */ package com.amazonaws.mturk.addon; //validating an XML document with an XSD schema import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.StringReader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import javax.xml.XMLConstants; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.xml.sax.Attributes; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; import com.amazonaws.mturk.service.exception.ValidationException; /** * Validate a Turk QAP file using the Turk XSD schema Requires a copy of the * Turk schema "QuestionForm.xsd" in the current directory available from: * http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionForm.xsd */ public class QAPValidator { public final static String QUESTION_FORM_XSD = "QuestionForm.xsd"; public final static String FORMATTED_CONTENT_XSD = "FormattedContentXHTMLSubset.xsd"; public final static String EXTERNAL_QUESTION_XSD = "ExternalQuestion.xsd"; public static void validate(String question) throws ValidationException, IOException { validate(question, false, QUESTION_FORM_XSD, false); } public static void validateFile(String fileName) throws ValidationException, IOException { validate(fileName, true, QUESTION_FORM_XSD, false); } protected static void validate(String fileOrString, boolean isFile, String schema, boolean skipFormattedContent) throws ValidationException, IOException { FileReader fReader = null; StringReader sReader = null; try { // Get a parser capable of parsing vanilla XML into a DOM tree DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder parser = factory.newDocumentBuilder(); // parse the JNLP file on the command line purely as XML and get a DOM // tree representation. Document document = null; if (isFile) { fReader = new FileReader(fileOrString); document = parser.parse(new File(fileOrString)); } else { sReader = new StringReader(fileOrString); document = parser.parse(new InputSource(new StringReader(fileOrString))); } // Determine if root node is QuestionForm or ExternalQuestion, // then validate using the appropriate schema. For // QuestionForm, also find and validate FormattedContent // elements. Element docElement = document.getDocumentElement(); String docElemName = docElement.getTagName(); // build an XSD-aware SchemaFactory SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); System.setProperty("javax.xml.transform.TransformerFactory", "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl"); // hook up mindless org.xml.sax.ErrorHandler implementation. schemaFactory.setErrorHandler(new XSDErrorHandler()); if (docElemName == "QuestionForm") { schema = QUESTION_FORM_XSD; } else if (docElemName == "ExternalQuestion") { schema = EXTERNAL_QUESTION_XSD; skipFormattedContent = true; } else { throw new SAXException("Root element is not valid Question " + "data (QuestionForm, ExternalQuestion)"); } // get the custom xsd schema describing the required format for my XML files. Schema schemaXSD = schemaFactory.newSchema(QAPValidator.class.getResource(schema)); // The line above doesn't work when run in the Eclipse debugger. // Use an explicit file URL as show below if running in eclipse debugger //Schema schemaXSD = schemaFactory.newSchema( new URL("file:/...path to SDK.../etc/schema/2005-10-01/QuestionForm.xsd") ); // Create a Validator capable of validating JNLP files according to to the // Vampqh custom schema. Validator validator = schemaXSD.newValidator(); // parse the JNLP tree againts the stricter XSD schema validator.validate(new DOMSource(document)); if (!skipFormattedContent) { String xmlString = null; if (isFile) { // Read the raw XML into a string File file = new File(fileOrString); char[] buffer = new char[ (int) file.length() + 1 ]; BufferedReader reader = new BufferedReader( new FileReader( file ) ); try { reader.read( buffer ); } finally { reader.close(); } xmlString = new String( buffer ); } else { xmlString = fileOrString; } XhtmlValidator.validateAndClean( xmlString ); } closeStreams(isFile, fReader, sReader); } catch (SAXParseException e) { closeStreams(isFile, fReader, sReader); throw new ValidationException("[" + e.getLineNumber() + "," + e.getColumnNumber() + "] " + e.getMessage(), null); } catch (Exception e) { closeStreams(isFile, fReader, sReader); throw new ValidationException(e.getMessage(), e); } } private static void closeStreams(boolean isFile, FileReader fReader, StringReader sReader) throws IOException { if (isFile && fReader != null) fReader.close(); else if (sReader != null) sReader.close(); } } // end ValidateSchema class XSDHandler extends DefaultHandler { private final String FORMATTED_CONTENT_TOKEN = "FormattedContent"; boolean atFormattedContent = false; public void startDocument() throws SAXException { // System.out.println( "SAX Event: START DOCUMENT" ); } public void endDocument() throws SAXException { // System.out.println( "SAX Event: END DOCUMENT" ); } public void startElement(String namespaceURI, String localName, String qName, Attributes attr) throws SAXException { if (localName.equalsIgnoreCase(FORMATTED_CONTENT_TOKEN)) { atFormattedContent = true; } } public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (localName.equalsIgnoreCase(FORMATTED_CONTENT_TOKEN)) atFormattedContent = false; } public void characters(char[] ch, int start, int length) throws SAXException { String thisString = new String(ch, start, length); if (atFormattedContent) { try { if (!thisString.trim().equals("")) { String xhtmlPre = "<?xml version=\"1.0\"?><FormattedContent xmlns=\"http://www.w3.org/1999/xhtml\">"; String xhtmlPost = "</FormattedContent>"; QAPValidator.validate(xhtmlPre + thisString + xhtmlPost, false, QAPValidator.FORMATTED_CONTENT_XSD, false); } } catch (Exception e) { throw new SAXException(e.getMessage(), e); } } } } class XSDErrorHandler implements ErrorHandler { /** * default contstructor */ public XSDErrorHandler() { } /** * Receive notification of a warning. * * <p> * SAX parsers will use this method to report conditions that are not errors * or fatal errors as defined by the XML recommendation. The default behaviour * is to take no action. * </p> * * <p> * The SAX parser must continue to provide normal parsing events after * invoking this method: it should still be possible for the application to * process the document through to the end. * </p> * * <p> * Filters may use this method to report other, non-XML warnings as well. * </p> * * @param exception * The warning information encapsulated in a SAX parse exception. * @exception org.xml.sax.SAXException * Any SAX exception, possibly wrapping another exception. * @see org.xml.sax.SAXParseException */ public void warning(SAXParseException exception) throws SAXException { System.err.println("[WARNING] " + exception.getMessage()); } /** * Receive notification of a recoverable error. * * <p> * This corresponds to the definition of "error" in section 1.2 of the W3C XML * 1.0 Recommendation. For example, a validating parser would use this * callback to report the violation of a validity constraint. The default * behaviour is to take no action. * </p> * * <p> * The SAX parser must continue to provide normal parsing events after * invoking this method: it should still be possible for the application to * process the document through to the end. If the application cannot do so, * then the parser should report a fatal error even if the XML recommendation * does not require it to do so. * </p> * * <p> * Filters may use this method to report other, non-XML errors as well. * </p> * * @param exception * The error information encapsulated in a SAX parse exception. * @exception org.xml.sax.SAXException * Any SAX exception, possibly wrapping another exception. * @see org.xml.sax.SAXParseException */ public void error(SAXParseException exception) throws SAXException { System.err.println("[ERROR] " + exception.getMessage()); } /** * Receive notification of a non-recoverable error. * * <p> * <strong>There is an apparent contradiction between the documentation for * this method and the documentation for {@link * org.xml.sax.ContentHandler#endDocument}. Until this ambiguity is resolved * in a future major release, clients should make no assumptions about whether * endDocument() will or will not be invoked when the parser has reported a * fatalError() or thrown an exception.</strong> * </p> * * <p> * This corresponds to the definition of "fatal error" in section 1.2 of the * W3C XML 1.0 Recommendation. For example, a parser would use this callback * to report the violation of a well-formedness constraint. * </p> * * <p> * The application must assume that the document is unusable after the parser * has invoked this method, and should continue (if at all) only for the sake * of collecting additional error messages: in fact, SAX parsers are free to * stop reporting any other events once this method has been invoked. * </p> * * @param exception * The error information encapsulated in a SAX parse exception. * @exception org.xml.sax.SAXException * Any SAX exception, possibly wrapping another exception. * @see org.xml.sax.SAXParseException */ public void fatalError(SAXParseException exception) throws SAXException { System.err.println("[FATAL ERROR] " + exception.getMessage()); } } // end JNLPErrorHandler