package com.cognitionis.nlp_files;
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
import java.io.*;
import javax.xml.parsers.*;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.*;
import org.xml.sax.*;
import com.cognitionis.utils_basickit.Xml2PlainHandler;
/**
*
* @author Héctor Llorens
* @since 2011
*/
public class XMLFile extends NLPFile {
/*
* XML description files
*/
//private String dtd; DEPRECATED use XSD
private String xsd;
public XMLFile(String filename, String xsd_path) {
super(filename); // dtd = null; DEPRECATED use XSD
xsd = xsd_path;
}
public Boolean isWellFormatted() {
try {
if (super.getFile()==null) {
throw new Exception("No file loaded in NLPFile object");
}
if (xsd!=null) {
// File dtd_file = new File(FileUtils.getApplicationPath()+FileUtils.NLPFiles_descr_path + extension + ".dtd");
// if (dtd_file.exists() && dtd_file.isFile()) {
// this.dtd = dtd_file.getCanonicalPath();
// }
File xsd_file = new File(xsd);
if (xsd_file.exists() && xsd_file.isFile()) {
this.xsd = xsd_file.getCanonicalPath();
}
}
if (!SAXParserCheck(this.f)) {
System.err.println("Malformed XML (SAX)");
return false;
}
/*
DOM parser: Deprecated
SAX is faster and for the moment does the JOB
if (!DOMParserCheck(this.f)) {
System.err.println("Malformed XML (DOM)");
return false;
} else {
System.err.println("Correct DOM");
}*/
} catch (Exception e) {
System.err.println("Errors found (" + this.getClass().getSimpleName() + "):\n\t" + e.toString() + "\n");
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
return false;
}
return true;
}
public Boolean SAXParserCheck(File f) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
// For DTD (only internal...) DEPRECATED use XSD
//factory.setValidating(false);
//factory.setNamespaceAware(true);
// For XSD
factory.setValidating(false);
factory.setNamespaceAware(true);
if (xsd != null) {
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
System.err.println("Validating "+f.getName()+" with xsd (" + xsd + ")");
}
SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
factory.setSchema(schemaFactory.newSchema(new Source[]{new StreamSource(xsd)}));
}
SAXParser parser = factory.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setErrorHandler(new SimpleErrorHandler());
reader.parse(f.getCanonicalPath());
return true;
} catch (Exception e) {
System.err.println("Errors found (XMLFile):\n\t" + e.toString() + "\n");
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
return false;
}
}
public Boolean DOMParserCheck(File f) {
try {
// Create a Xerces DOM Parser
DOMParser parser = new DOMParser();
// Turn Validation on
parser.setFeature("http://xml.org/sax/features/validation", true);
parser.setFeature("http://apache.org/xml/features/validation/schema", true);
parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", true);
// Register Error Handler
parser.setErrorHandler(new SimpleErrorHandler());
// Parse the Document and traverse the DOM
parser.parse(f.getCanonicalPath());
// Document document = parser.getDocument();
// traverse (document);
//DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// For XSD
//factory.setValidating(false);
//factory.setNamespaceAware(true);
/*if (xsd != null) {
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
System.err.println("Validating "+f.getName()+" with xsd (" + xsd + ")");
}
SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
factory.setSchema(schemaFactory.newSchema(new Source[]{new StreamSource(xsd)}));
}*/
//DocumentBuilder builder = factory.newDocumentBuilder();
//builder.setErrorHandler(new SimpleErrorHandler());
//Document document = builder.parse(new InputSource(f.getCanonicalPath()));
return true;
} catch (Exception e) {
System.err.println("Errors found (XMLFile):\n\t" + e.toString() + "\n");
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
return false;
}
}
public String toPlain(){
return toPlain(this.getFile().toString() + ".plain");
}
@Override
public String toPlain(String filename) {
try {
Xml2PlainHandler xml2plain = new Xml2PlainHandler();
//xml2plain.getText(this.f.getCanonicalPath());
xml2plain.saveFile(this.f.getCanonicalPath(), filename);
return filename;
} catch (Exception e) {
System.err.println("Errors found (" + this.getClass().getSimpleName() + "):\n\t" + e.toString() + "\n");
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
return null;
}
}
private class SimpleErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
System.err.println("Warning:" +e.getMessage());
if (System.getProperty("DEBUG")!=null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
}
public void error(SAXParseException e) throws SAXException {
//System.err.println(e.getMessage());
if (System.getProperty("DEBUG")!=null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
throw e;
}
public void fatalError(SAXParseException e) throws SAXException {
//System.err.println(e.getMessage());
if (System.getProperty("DEBUG")!=null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
throw e;
}
}
}