/*
* Copyright 2003-2010 Tufts University Licensed under the
* Educational Community License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.osedu.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS"
* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package tufts.vue.ds;
import tufts.Util;
import tufts.vue.DEBUG;
import tufts.vue.MetaMap;
import tufts.vue.MetaMap.*;
import java.util.*;
import java.io.*;
import java.net.*;
import javax.xml.xpath.*;
import javax.xml.parsers.*;
import org.w3c.dom.Node;
import org.w3c.dom.*;
import org.xml.sax.*;
// TODO: just forget handling depth (e.g., jira comments) for now -- can tackle later.
// The keep in mind w/respect to how we handle data-set interation, so could
// add this under the hood if we like later.
// As for our data-model, we could literally use the XML DOM, tho that's got
// way more than we need in it and isn't very convenient. I guess we
// just need our own API that nicely abstracts everything, so under the
// hood we could use anything from Jackrabbit to DOM to Mutlimaps to SQL
// or whatever.
// NEED TO GENERICALLY HANDLE KEY MANAGEMENT, AND DATA-CHANGE DETECTION.
// Big question: do we persist original raw XML streams, or digest
// the data first then persist it? First case is safer for
// ultimate data integrity -- can fix parsing / data coalesecing
// bugs or make enhancements more easily. We could persist the
// mashed data, but but then we'd just need another format / persist
// schema anyway.
/**
* @version $Revision: 1.16 $ / $Date: 2010-02-03 19:13:16 $ / $Author: mike $
* @author Scott Fraize
*/
public class XMLIngest {
private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(XMLIngest.class);
private static final boolean XML_DEBUG = false;
private static final boolean XML_OUTPUT = false;
// REPLACE WITH A WRAPPER: XMLIngestor / XML-SCHEMA-LOADER (or just split out as XMLIngest methods)
// ALL WE NEED are the track methods, and a public final schema for dumpSchema debug & isXMLKeyFold(),
// which we can probably pull out of generic Schema
public static class XmlSchema extends tufts.vue.ds.Schema
{
final String itemPath;
final int itemPathLen;
DataRow curRow;
/** castor peristance only */
// todo: see if we can get rid of this class entirely and have Schema be a final class
// so we don't run into all the persistance complications with castor.
public XmlSchema() {
itemPath = "<unknown>";
itemPathLen = 0;
}
public XmlSchema(tufts.vue.Resource source, String itemPath)
{
super.setResource(source);
this.itemPath = itemPath;
if (itemPath == null || itemPath.length() == 0)
itemPathLen = 0;
else
itemPathLen = itemPath.length() + 1; // add one for dot
setXMLKeyFold(itemPath != null && itemPath.startsWith("plist."));
Log.debug("Constructed XmlSchema " + this);
//itemPathLen = itemPath.length() + (itemPath.endsWith(".") ? 0 : 1);
}
@Override public void dumpSchema(PrintWriter ps) {
if (itemPath != null) ps.println("ItemPath: " + itemPath);
super.dumpSchema(ps);
}
void trackFieldValuePair(String name, String value) {
//errout("TRACK " + name + "=" + value);
if (itemPath != null && name.startsWith(itemPath) && name.length() > itemPathLen)
name = name.substring(itemPathLen);
// Field field = mFields.get(name);
// if (field == null) {
// field = new Field(name, this);
// // if (name.equals(getKeyNode()))
// // keyField = field;
// mFields.put(name, field);
// if (name.length() > mLongestFieldName)
// mLongestFieldName = name.length();
// }
Field field = getField(name);
if (field == null) {
field = addField(name);
if (name.length() > mLongestFieldName)
mLongestFieldName = name.length();
}
if (curRow != null)
curRow.addValue(field, value);
else
field.trackValue(value);
}
void trackNodeOpen(String name) {
if (name.equals(getRowStartNode())) {
//errout("OPEN " + name);
// curRow = new VRow(fields.size()); // fields includes non-row-extraction values
curRow = new DataRow(this);
addRow(curRow);
}
}
void trackNodeClose(String name) {
if (name.equals(getRowStartNode())) {
//errout(String.format("CLOSE %s with %2d fields, key %s", name, curRow.size(), curRow.getValue(keyField)));
curRow = null;
}
}
private String getRowStartNode() { return itemPath; }
//private String getKeyNode() { return null; }
}
// static class RssSchema extends XmlSchema {
// @Override
// final String getRowStartNode() {
// //return "item";
// return "rss.channel.item";
// }
// @Override
// final String getKeyNode() {
// //return "rss.channel.item.key";
// //return "item.key";
// return "key";
// }
// }
static int depth = 0;
static void XPathExtract(XmlSchema schema, Document document)
{
try {
XPath xpath = XPathFactory.newInstance().newXPath();
String expression = "/rss/channel/item";
//String expression = "rss/channel/item/title";
errout("Extracting " + expression);
// First, obtain the element as a node.
//tufts.DocDump.dump(document);
Node nodeValue = (Node) xpath.evaluate(expression, document, XPathConstants.NODE);
errout(" Node: " + nodeValue);
// Next, obtain the element as a String.
String stringValue = (String) xpath.evaluate(expression, document, XPathConstants.STRING);
System.out.println(" String: " + stringValue);
NodeList nodeSet = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
errout("NodeSet: " + Util.tag(nodeSet) + "; size=" + nodeSet.getLength());
for (int i = 0; i < nodeSet.getLength(); i++) {
scanNode(schema, nodeSet.item(i), null, null);
}
// // Finally, obtain the element as a Number (Double).
// Double birthdateDouble = (Double) xpath.evaluate(expression, document, XPathConstants.NUMBER);
// System.out.println("Double is: " + birthdateDouble);
} catch (XPathExpressionException e) {
System.err.println("XPathExpressionException caught...");
e.printStackTrace();
} catch (Throwable t) {
t.printStackTrace();
}
}
public static Schema ingestXML(XmlSchema schema, org.xml.sax.InputSource input, String itemKey)
{
final org.w3c.dom.Document doc = parseXML(input, false);
//doc.normalizeDocument();
if (DEBUG.DR) {
try {
errout("XML parsed, document built:");
errout("org.w3c.dom.Document: " + Util.tags(doc));
final org.w3c.dom.DocumentType type = doc.getDoctype();
//errout("InputEncoding: " + doc.getInputEncoding()); // AbstractMethodError ?
//errout("xmlEncoding: " + doc.getXmlEncoding()); // AbstractMethodError
//errout("xmlVersion: " + doc.getXmlVersion()); // AbstractMethodError
errout("docType: " + Util.tags(type));
if (type != null) {
errout("docType.name: " + Util.tags(type.getName()));
errout("docType.entities: " + Util.tags(type.getEntities()));
errout("docType.notations: " + Util.tags(type.getNotations()));
errout("docType.publicId: " + Util.tags(type.getPublicId()));
errout("docType.systemId: " + Util.tags(type.getSystemId()));
}
errout("impl: " + Util.tags(doc.getImplementation().getClass()));
errout("docElement: " + Util.tags(doc.getDocumentElement().getClass())); // toString() can dump whole document!
} catch (Throwable t) {
Log.error("debug failure", t);
}
}
//out("element: " + Util.tags(doc.getDocumentElement()));
//outln("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
//outln("<!-- created by RSSTest " + new Date() + " from " + src + " -->");
if (schema == null)
schema = new XmlSchema(tufts.vue.Resource.instance(input), itemKey);
else
schema.flushData();
if (false)
XPathExtract(schema, doc);
else
scanNode(schema, doc.getDocumentElement(), null, null);
if (DEBUG.DR || DEBUG.SCHEMA) schema.dumpSchema(System.err);
return schema;
}
private static boolean isText(int type) {
return type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE;
}
private static boolean isText(Node node) {
return isText(node.getNodeType());
}
private static final String getNodeType(Node n) {
return getNodeType(n.getNodeType());
}
private static final String getNodeType(int t) {
if (t == Node.ATTRIBUTE_NODE) return "attr";
if (t == Node.CDATA_SECTION_NODE) return "cdata";
if (t == Node.COMMENT_NODE) return "comment";
if (t == Node.DOCUMENT_NODE) return "document";
if (t == Node.ELEMENT_NODE) return "element";
if (t == Node.ENTITY_NODE) return "entity";
if (t == Node.TEXT_NODE) return "text";
return "" + t;
}
// parentPath is the fully-qualified parent name
private static void scanNode(XmlSchema schema, org.w3c.dom.Node n, String parentPath, String parentName) {
final int type = n.getNodeType();
final String value = n.getNodeValue();
final boolean isAttribute = (type == Node.ATTRIBUTE_NODE);
String name = n.getNodeName();
scanNode(schema, n, type, parentPath, parentName, name, value);
}
private static void scanNode(final XmlSchema schema,
final org.w3c.dom.Node node,
final int type,
final String parentPath,
final String parentName,
final String nodeName,
final String value)
{
final boolean isAttribute = (type == Node.ATTRIBUTE_NODE);
final boolean isMergedText = FOLD_TEXT && isText(type);
final boolean hasAttributes = (!isAttribute && node != null && node.hasAttributes());
Node firstChild = null, lastChild = null;
if (node != null) {
firstChild = node.getFirstChild();
lastChild = node.getLastChild();
}
final String XMLName;
if (isAttribute)
XMLName = parentName + ATTR_SEPARATOR + nodeName;
else
XMLName = nodeName;
final String fullName;
if (parentPath != null) { // should only be null first time in at the top root
if (isMergedText)
fullName = parentPath;
else if (isAttribute)
fullName = parentPath + ATTR_SEPARATOR + nodeName;
else
fullName = parentPath + '.' + nodeName;
} else {
fullName = nodeName;
}
if (type == Node.ELEMENT_NODE)
schema.trackNodeOpen(fullName);
if (depth < REPORT_THRESH) {
if (depth < REPORT_THRESH - 1) {
if (type == Node.TEXT_NODE)
eoutln(String.format("node(%s) {%s} (len=%d)", getNodeType(type), fullName, value.length()));
else
eoutln(String.format("NODE(%s) {%s} %.192s", getNodeType(type), fullName, node, Util.tags(firstChild)));
}
//eoutln("NODE: " + type + " name=" + name + " " + Util.tags(n) + " firstChild=" + Util.tags(firstChild));
//System.err.println(name);
else if (XML_DEBUG)
System.err.print(".");
}
if (hasAttributes && ATTRIBUTES_IMMEDIATE)
scanAttributes(schema, fullName, nodeName, node.getAttributes());
String outputValue = null;
if (value != null) {
outputValue = value.trim();
if (outputValue.length() > 0) {
schema.trackFieldValuePair(fullName, outputValue);
} else
outputValue = null;
}
final NodeList children = node == null ? null : node.getChildNodes();
final boolean DO_TAG;
if (isMergedText) {
DO_TAG = false;
}
else if (outputValue == null && node != null) {
if (!node.hasChildNodes()) {
DO_TAG = false;
}
else if (children.getLength() == 1 && isText(firstChild) && firstChild.getNodeValue().trim().length() == 0) {
DO_TAG = false;
}
else
DO_TAG = true;
// if (!DO_TAG) ioutln("<!-- empty: " + nodeName + " -->");
}
else
DO_TAG = true;
boolean closeOnSameLine = false;
if (DO_TAG) {
iout("<");
out(XMLName);
//if (node.hasChildNodes()) out(" children=" + node.getChildNodes().getLength() + " first=" + node.getFirstChild());
out(">");
if (firstChild == null || (isText(firstChild) && firstChild == lastChild)) {
// if (firstChild != null && firstChild.getNodeType() == Node.CDATA_SECTION_NODE)
// ;
// else
closeOnSameLine = true;
} else if (XML_OUTPUT)
System.out.print('\n');
if (FOLD_TEXT && (type != Node.ELEMENT_NODE && type != Node.ATTRIBUTE_NODE)) {
final String err = "UNHANDLED TYPE=" + type + "; " + nodeName;
outln("<" + err + ">");
errout(err);
}
}
if (outputValue != null) {
if (type == Node.CDATA_SECTION_NODE) {
out("<![CDATA[");
out(outputValue);
out("]]>");
} else {
out(XMLEntityEncode(outputValue));
}
}
if (!isAttribute && node != null) {
// god knows why, but attributes have themselves as children? (or is that
// the #text entry?) Anyway, if we allow this for an attribute dump, the
// value of the attribute will literally appear twice in the output,
// back-to-back as one string.
depth++;
if (FOLD_KEYS || schema.isXMLKeyFold()) {
scanFoldedChildren(schema, children, fullName, nodeName);
} else {
for (int i = 0; i < children.getLength(); i++)
scanNode(schema, children.item(i), fullName, nodeName);
}
depth--;
}
if (DO_TAG) {
if (closeOnSameLine)
outln("</" + XMLName + ">");
else
ioutln("</" + XMLName + ">");
}
if (type == Node.ELEMENT_NODE)
schema.trackNodeClose(fullName);
if (hasAttributes && ! ATTRIBUTES_IMMEDIATE)
scanAttributes(schema, fullName, nodeName, node.getAttributes());
//iout("children: " + Util.tags(n.getChildNodes()));
}
private static void scanAttributes(XmlSchema schema, String fullName, String nodeName, NamedNodeMap attr) {
if (attr != null && attr.getLength() > 0) {
//depth++;
for (int i = 0; i < attr.getLength(); i++) {
final Node a = attr.item(i);
scanNode(schema, a, fullName, nodeName);
}
//depth--;
}
}
private static void scanFoldedChildren(XmlSchema schema, final NodeList children, final String fullName, final String nodeName)
{
// Test code for folding Apple plist style <dict> pairs (<key>UserKey</key><string>UserValue</string>)
// using iTunes Music Library.xml as test case.
for (int i = 0; i < children.getLength(); i++) {
final Node item = children.item(i);
final Node next = children.item(i+1);
if (next != null) {
final String nextName = next.getNodeName();
//errout("checking pair: " + item.getNodeName() + "/" + nextName);
//if ("key".equals(item.getNodeName()) && !"dict".equals(nextName)) {
if ("key".equals(item.getNodeName())) {
//final String newNodeName = item.getNodeValue();
//final String newNodeValue = next.getNodeValue();
// must extract through one more layer of indirection
String newNodeName = item.getChildNodes().item(0).getNodeValue();
if (newNodeName != null)
newNodeName = newNodeName.replace(' ', '_');
final String newNodeValue;
if ("true".equals(nextName)) {
//newNodeValue = next.getNodeValue()
newNodeValue = "true"; // is a simle "<true/>" self-terminating value with/NO CHILDREN
}
else if ("false".equals(nextName)) {
// almost never see this in iTunes Music Library.xml
//errout("GOT FALSE");
newNodeValue = "false";
}
else if ("dict".equals(nextName) || "array".equals(nextName)) {
continue;
// //newNodeValue = "(todo: pull-up under: " + nextName + ")";
// newNodeValue = nextName;
// i--; // we're not extracting this yet, so don't pull it out below
}
else {
newNodeValue = next.getChildNodes().item(0).getNodeValue();
}
//if ("Visible".equals(newNodeName)) errout("VALUE: " + newNodeValue);
//errout(String.format("\t%s=[%s]", newNodeName, newNodeValue));
//errout("value children: " + item.getChildNodes());
// extract the current node value as a new node name, and the next node value as the new node value
scanNode(schema, null, Node.ELEMENT_NODE, fullName, nodeName, newNodeName, newNodeValue);
i++;
continue;
}
}
scanNode(schema, item, fullName, nodeName);
}
}
/*
public static void dumpElement(Element e) {
out("\tElement: " + Util.tags(e));
out("\tElement tag: " + e.getTagName());
out("\tElement SchemaTypeInfo: " + Util.tags(e.getSchemaTypeInfo()));
}
*/
// Parses an XML file and returns a DOM document.
// If validating is true, the contents is validated against the DTD
// specified in the file.
private static org.w3c.dom.Document parseXML(Object input, boolean validating) {
try {
// Create a builder factory
javax.xml.parsers.DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setIgnoringElementContentWhitespace(true);
factory.setIgnoringComments(true);
//factory.setCoalescing(true);
factory.setValidating(validating);
// Create the builder and parse the file
final org.w3c.dom.Document doc;
if (input instanceof String) {
doc = factory.newDocumentBuilder().parse(new File((String)input));
} else if (input instanceof InputSource) {
doc = factory.newDocumentBuilder().parse((InputSource)input);
} else if (input instanceof InputStream) {
// InputSource encoded = new InputSource();
// encoded.setByteStream((InputStream)input);
// encoded.setEncoding("ISO-8859-1"); // TODO: get from url stream
// doc = factory.newDocumentBuilder().parse(encoded);
// //doc = factory.newDocumentBuilder().parse(new InputStreamReader((InputStream) input, "ISO-8859-1"));
doc = factory.newDocumentBuilder().parse((InputStream) input);
}
else
throw new Error("Unhandled input type: " + Util.tags(input));
return doc;
} catch (Throwable t) {
t.printStackTrace();
}
/*catch (SAXException e) {
// A parsing error occurred; the xml input is not valid
} catch (ParserConfigurationException e) {
} catch (IOException e) {
}
*/
return null;
}
public static String XMLEntityEncode(final String text)
{
// todo: if the result of this is simply destined for a writer, would
// be more efficient to pass the writer in, and skip constructing new
// StringBuffers. Apache Commons has methods for this, presumably
// for this reason -- eventually go ahead and use that:
// will NOT introduce " uneeded for us, possibly problematic, in
// that this text may ultimately be handled by an HTML component which
// won't handle """ (todo: test w/JLabel <html>)
//return org.apache.commons.lang.StringEscapeUtils.escapeHtml(s);
// will introduce "
//return org.apache.commons.lang.StringEscapeUtils.escapeXml(s);
StringBuilder buf = null;
final int len = (text == null ? -1 : text.length());
for ( int i = 0; i < len; i++ )
{
final char c = text.charAt(i);
String entity = null;
switch (c) {
// These are the five basic XML entities:
// See http://commons.apache.org/lang/api/org/apache/commons/lang/StringEscapeUtils.html
case '&': entity = "&"; break;
case '<': entity = "<"; break;
case '>': entity = ">"; break;
case '"': entity = """; break;
//case '\'': entity = "'"; break; // not a legal HTML entity, even tho is a legal XML entity
//case '\r': entity = "
"; break; // test
default:
if (buf != null)
buf.append(c);
continue;
}
// We've encountered something to encode: entity has been set:
if (buf == null) {
buf = new StringBuilder(len + 12);
buf.append(text, 0, i);
}
buf.append(entity);
}
return buf == null ? text : buf.toString();
// for ( int i = 0; i < len; i++ ) {
// final char c = s.charAt( i );
// if (c >= 'a' && c <= 'z' || c >='A' && c <= 'Z' || c >= '0' && c <= '9') {
// buf.append( c );
// } else {
// final String entity;
// switch (c) {
// case '&': entity = "&"; break;
// case '<': entity = "<"; break;
// case '>': entity = ">"; break;
// case '"': entity = """; break;
// //case '\'': entity = "'"; break; // apparently, not actually a legal entity
// //case '\r': entity = "
"; break;
// default: entity = null;
// }
// if (entity != null)
// buf.append(entity);
// else
// //buf.append( "" + (int)c + ";" );
// buf.append(c);
// }
// }
// return buf.toString();
}
public static void iout(String s) {
iout(depth, s);
}
public static void ioutln(String s) {
ioutln(depth, s);
}
final static String TAB = " ";
public static void iout(int _depth, String s) {
if (XML_OUTPUT) {
for (int x = 0; x < _depth; x++) System.out.print(TAB);
System.out.print(s);
}
}
public static void ioutln(int _depth, String s) {
if (XML_OUTPUT) {
for (int x = 0; x < _depth; x++) System.out.print(TAB);
System.out.println(s);
}
}
public static void eoutln(int _depth, String s) {
if (XML_OUTPUT) {
for (int x = 0; x < _depth; x++) System.err.print(TAB);
System.err.println(s);
}
}
public static void eoutln(String s) {
eoutln(depth, s);
}
public static void out(String s) {
if (XML_OUTPUT)
System.out.print(s == null ? "null" : s);
}
public static void outln(String s) {
if (XML_OUTPUT)
System.out.println(s == null ? "null" : s);
}
public static void errout(String s) {
Log.debug(s == null ? "null" : s);
//System.err.println("XMLIngest: " + s);
}
final static boolean ATTRIBUTES_IMMEDIATE = false; // false better for clearer XML output, true better for schema output (e.g., rss.version 1st, not last)
final static boolean FOLD_TEXT = true; // default true: fold Node.TEXT_NODE(#text) and CDATA items into parent node
final static boolean FOLD_KEYS = false; // auto-enabled if top-level item is "plist" (current breaks on JIRA XML if true)
//final static int REPORT_THRESH = FOLD_KEYS ? 4 : 3;
final static int REPORT_THRESH = 4;
//final static int REPORT_THRESH = 1;
final static char ATTR_SEPARATOR = '@';
private static final String JIRA_VUE_URL = "http://bugs.atech.tufts.edu/secure/IssueNavigator.jspa?view=rss&pid=10001&tempMax=9999&reset=true&decorator=none";
private static final String JIRA_SFRAIZE_COOKIE = "seraph.os.cookie=LkPlQkOlJlHkHiEpGiOiGjJjFi";
private static InputStream getTestXMLStream()
throws IOException
{
// // SMF 2008-10-02: E.g. Craigslist XML streams use ISO-8859-1, which is provided in
// // HTML headers as "Content-Type: application/rss+xml; charset=ISO-8859-1", (tho not
// // in a special content-encoding header), and our current XML parser fails unless
// // the stream is read with this set: e.g.: [org.xml.sax.SAXParseException: Character
// // conversion error: "Unconvertible UTF-8 character beginning with 0x95" (line
// // number may be too low).] Actually, in this case it turns out that providing a
// // default InputStreamReader (encoding not specified) as opposed to a direct
// // InputStream from the URLConnection works, and the XML parser is presumably then
// // finding and handling the "<?xml version="1.0" encoding="ISO-8859-1"?>" line at
// // the top of the XML stream
// final XmlSchema schema = new XmlSchema(conn.getURL(), itemKey);
// InputStream is = null;
// try {
// is = conn.getInputStream();
// errout("GOT INPUT STREAM: " + Util.tags(is));
// } catch (IOException e) {
// e.printStackTrace();
// return null;
// }
// final Document doc = parseXML(is, false);
// Could also use a ROME API XmlReader(URLConnection) for handling
// the input, which does it's own magic to figure out the encoding.
// For more on the complexity of this issue, see:
// http://diveintomark.org/archives/2004/02/13/xml-media-types
URL url = new URL(JIRA_VUE_URL);
URLConnection conn = url.openConnection();
conn.setRequestProperty("Cookie", JIRA_SFRAIZE_COOKIE);
errout("Opening connection to " + url);
conn.connect();
errout("Getting InputStream...");
InputStream in = conn.getInputStream();
errout("Got " + Util.tags(in));
errout("Getting headers...");
Map<String,List<String>> headers = conn.getHeaderFields();
errout("HEADERS:");
for (Map.Entry<String,List<String>> e : headers.entrySet()) {
errout(e.getKey() + ": " + e.getValue());
}
return in;
}
public static void main(String[] args)
throws IOException
{
DEBUG.Enabled = DEBUG.DR = DEBUG.IO = DEBUG.SCHEMA = true;
tufts.vue.VUE.parseArgs(args);
org.apache.log4j.Logger.getRootLogger().removeAllAppenders(); // need to do this or we get everything twice
org.apache.log4j.Logger.getRootLogger().addAppender
(new org.apache.log4j.ConsoleAppender(tufts.vue.VUE.MasterLogPattern, "System.err"));
//final XmlSchema schema = new RssSchema();
errout("Max mem: " + Util.abbrevBytes(Runtime.getRuntime().maxMemory()));
//getXMLStream();System.exit(0);
final String file = args[0];
final String key = args[1];
Log.debug("File: " + file);
Log.debug("Key: " + key);
final InputSource is = new InputSource(file);
is.setCharacterStream(new FileReader(file));
//XMLIngest.XML_DEBUG = true;
Schema schema = ingestXML(null,is, key);
//schema.dumpSchema(System.err);
System.err.println("\n");
Log.debug("done");
}
// public static void main(String[] args)
// throws IOException
// {
// //final XmlSchema schema = new RssSchema();
// errout("Max mem: " + Util.abbrevBytes(Runtime.getRuntime().maxMemory()));
// //getXMLStream();System.exit(0);
// Document doc;
// String src;
// if (args.length < 1) {
// doc = parseXML(getTestXMLStream(), false);
// src = JIRA_VUE_URL;
// } else {
// doc = parseXML(args[0], false);
// src = args[0];
// }
// //doc.normalizeDocument();
// errout("GOT DOC " + Util.tag(doc) + " " + doc);
// errout("InputEncoding: " + doc.getInputEncoding());
// errout("xmlEncoding: " + doc.getXmlEncoding());
// errout("xmlVersion: " + doc.getXmlVersion());
// errout("docType: " + Util.tags(doc.getDoctype()));
// errout("impl: " + Util.tags(doc.getImplementation()));
// errout("docElement: " + Util.tags(doc.getDocumentElement()));
// //out("element: " + Util.tags(doc.getDocumentElement()));
// outln("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
// outln("<!-- created by RSSTest " + new Date() + " from " + src + " -->");
// final XmlSchema schema = new XmlSchema(Util.tag(doc), "rss.channel.item");
// if (true)
// XPathExtract(schema, doc);
// else
// scanNode(schema, doc.getDocumentElement(), null, null);
// schema.dumpSchema(System.err);
// }
}