/** * $Id: xpwd.java 21 2008-07-04 08:33:47Z daldei $ * $Date: 2008-07-04 04:33:47 -0400 (Fri, 04 Jul 2008) $ * */ package org.xmlsh.commands.internal; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.util.List; import javanet.staxutils.OutputFactory; import javax.xml.crypto.dsig.TransformException; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLEventWriter; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.Characters; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import javax.xml.transform.stream.StreamSource; import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.WhitespaceStrippingPolicy; import net.sf.saxon.s9api.XdmNode; import org.json.JSONObject; import org.xmlsh.core.CoreException; import org.xmlsh.core.InputPort; import org.xmlsh.core.InvalidArgumentException; import org.xmlsh.core.Options; import org.xmlsh.core.OutputPort; import org.xmlsh.core.UnexpectedException; import org.xmlsh.core.XCommand; import org.xmlsh.core.XValue; import org.xmlsh.sh.shell.SerializeOpts; import org.xmlsh.sh.shell.Shell; import org.xmlsh.util.Util; /* * * Convert XML files to an CSV file * * Arguments * * -header Add a header row * * */ public class xml2json extends XCommand { private boolean bIndent = false ; private int mLevel = 0; // indentation level private SerializeOpts mSerializeOpts; private static final String kENCODING_UTF_8 = "UTF-8"; private static final String kJXML_URI = "http://www.xmlsh.org/jxml"; private static final QName kATTR_ENCODING = new QName("encoding"); private static final QName kATTR_NAME = new QName("name"); private static final QName kATTR_VALUE = new QName("value"); private static final QName kATTR_SRC = new QName("src"); private static final QName kATTR_UNWRAP = new QName("unwrap"); private static final QName kATTR_HTML = new QName( "html" ); // A String formated as XHTML private static final QName kELEM_XJSON = new QName(kJXML_URI, "xjson" ); private static final QName kELEM_FILE = new QName(kJXML_URI, "file" ); private static final QName kELEM_OBJECT = new QName(kJXML_URI, "object" ); // A JSON Object private static final QName kELEM_MEMBER = new QName(kJXML_URI, "member" ); // A JSON Object Member private static final QName kELEM_STRING = new QName(kJXML_URI, "string" ); // A JSON STRING private static final QName kELEM_NUMBER = new QName(kJXML_URI, "number" ); // A JSON NUMBER private static final QName kELEM_ARRAY = new QName(kJXML_URI, "array" ); // A JSON ARRAY private static final QName kELEM_BOOLEAN = new QName(kJXML_URI, "boolean" ); // A JSON Literal (true,false) private static final QName kELEM_NULL = new QName(kJXML_URI, "null" ); // A JSON Literal null public int run( List<XValue> args ) throws Exception { Options opts = new Options("p=print",SerializeOpts.getOptionDefs()); opts.parse(args); bIndent = opts.hasOpt("p"); args = opts.getRemainingArgs(); OutputPort stdout = getStdout(); InputPort inp = args.isEmpty() ? getStdin() : getInput( args.get(0) ); SerializeOpts serializeOpts = getSerializeOpts(opts); XMLEventReader reader = inp.asXMLEventReader(serializeOpts); // Override the text encoding to UTF-8 - JSON is *always* USTF8 mSerializeOpts =serializeOpts.clone(); serializeOpts.setOutputTextEncoding(kENCODING_UTF_8); PrintWriter writer = stdout.asPrintWriter(serializeOpts); parse( reader , writer, false ); writer.flush(); writer.close(); // Consume input or we can get a Piped Close while( reader.hasNext() ) reader.nextEvent(); reader.close(); inp.release(); return 0; } private boolean parse(XMLEventReader reader, PrintWriter writer, boolean bComma ) throws XMLStreamException, CoreException, UnsupportedEncodingException, IOException, TransformException, SaxonApiException { mLevel++; while( reader.hasNext() ){ XMLEvent e = reader.nextEvent(); if( e.isStartElement() ){ StartElement start = e.asStartElement(); QName name = start.getName(); if( name.equals(kELEM_XJSON)){ if( mLevel != 1 ) throw new UnexpectedException("XJSON element must be at document root"); // Children become the new roots mLevel=0; while( parse( reader , writer , bComma ) ) ; return false; } else if( name.equals(kELEM_FILE)){ if( ! writeFile( start , reader , writer )) return false ; } else if( bComma ) writer.print(","); if( name.equals(kELEM_OBJECT) ) writeObject( start , reader , writer ); else if( name.equals(kELEM_ARRAY)) writeArray( start , reader , writer ); else if(name.equals(kELEM_MEMBER) ) writeMember( start , reader , writer ); else if( name.equals(kELEM_NUMBER)) writeNumber( start , reader , writer ); else if( name.equals(kELEM_BOOLEAN)) writeBoolean( start , reader , writer ); else if( name.equals(kELEM_NULL) ) writeNull( reader , writer ); else if( name.equals(kELEM_STRING)) writeString( start , reader , writer ); else readToEnd(reader); mLevel--; return true ; } else if( e.isEndElement() ){ mLevel--; return false ; } } mLevel--; return false ; } private boolean writeFile(StartElement start, XMLEventReader reader, PrintWriter writer) throws UnsupportedEncodingException, IOException, XMLStreamException, CoreException, TransformException, SaxonApiException { Attribute aname = start.getAttributeByName(kATTR_NAME); if( aname == null ) throw new InvalidArgumentException("Element FILE requries attribute name"); String name = aname.getValue(); //Attribute aencoding = start.getAttributeByName(new QName("encoding")); //String encoding = (aencoding == null ? "UTF-8" : aencoding.getValue()); PrintWriter w = getShell().getEnv().getOutput( getShell().getFile(name), false).asPrintWriter(mSerializeOpts); boolean ret = parse(reader,w,false); w.close(); return ret ; } private void writeString(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, FileNotFoundException, IOException, TransformException, SaxonApiException, CoreException { String value = getAttr( start , kATTR_VALUE); String src = getAttr( start , kATTR_SRC); String encoding = getAttr( start, kATTR_ENCODING); String unwrap = getAttr( start , kATTR_UNWRAP); String html = getAttr( start , kATTR_HTML); boolean bReadToEnd = true ; String chars ; if( value != null ) chars = value ; else if( src != null ) chars = readFile( src , encoding ); else { // readString eats the close tag bReadToEnd = false ; chars = readString( reader , Util.parseBoolean(html)); } // If Unwrap then trim off <html> and leading and trailing blanks if( Util.parseBoolean(unwrap)){ value = unwrap(value); } writer.print( JSONObject.quote(chars) ); if( bReadToEnd ) readToEnd(reader); } /* * Parse an HTML element as XML and reserialize as HTML, store as a JSON string */ private String readString(XMLEventReader reader, boolean bHTML ) throws TransformException, XMLStreamException, SaxonApiException, IOException { byte[] bytes = bHTML ? serializeAsXML( reader ) : serializeAsString(reader); // String xs = new String(xhtml,klENCODING_UTF_8); if( bHTML ) return formatAsHtml( bytes ); else return new String( bytes , kENCODING_UTF_8); } /* * Unwrap a string by * 1) Remove leading and trailing blanks * 2) Remove any <html> (any case) from beginning and end * 3) Remove leading and trailing blanks from the result */ private String unwrap(String value) { value = value.trim(); if( "<html>".equalsIgnoreCase(value.substring(0,6)) ) value = value.substring(6); if( "</html>".equalsIgnoreCase(value.substring(value.length() - 7 ))) value = value.substring(0 , value.length() - 7 ); return value.trim(); } private String readFile(String file, String encoding) throws UnsupportedEncodingException, FileNotFoundException, IOException, CoreException { InputPort ip = getShell().getInputPort(file); Reader r = new InputStreamReader( ip.asInputStream(mSerializeOpts) , encoding == null ? mSerializeOpts.getInputTextEncoding() : encoding ); StringBuffer sb = new StringBuffer(); char cbuf[] = new char[1000]; int n; while((n=r.read(cbuf)) > 0 ) sb.append(cbuf, 0, n); r.close(); ip.close(); return sb.toString(); } private String getAttr(StartElement start, QName attr) { Attribute a = start.getAttributeByName(attr); if(a == null ) return null; return a.getValue(); } private void writeNull( XMLEventReader reader, PrintWriter writer) throws XMLStreamException { writer.print("null"); readToEnd(reader); } private void writeBoolean(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException { writeNumber( start , reader , writer ); } private void writeNumber(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException { String chars ; Attribute v = start.getAttributeByName(kATTR_VALUE); if( v != null ) chars = v.getValue(); else chars = readChars( reader ); chars = chars.trim(); writer.print( chars ); readToEnd(reader); } private void writeMember(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException { indent(writer); String name = start.getAttributeByName( new QName("name")).getValue(); writer.print( JSONObject.quote(name) ); writer.print(":"); if( parse( reader , writer ,false)) readToEnd(reader); } private void writeArray(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException { indent(writer); writer.print("["); boolean bFirst = true ; do { if( ! parse( reader , writer , ! bFirst ) ) break ; bFirst = false ; } while( true ) ; writer.print("]"); } private void writeObject(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException { indent(writer); writer.print("{"); boolean bFirst = true ; do { if( ! parse( reader , writer , ! bFirst ) ) break ; bFirst = false ; } while( true ) ; indent(writer); writer.print("}"); } /* * Serialize the body as HTML and return as a string */ private String formatAsHtml(byte[] xhtml) throws SaxonApiException, UnsupportedEncodingException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); Serializer ser = Shell.getProcessor().newSerializer(); ser.setOutputProperty( Serializer.Property.OMIT_XML_DECLARATION, "yes" ); ser.setOutputProperty(Serializer.Property.INDENT , "no"); ser.setOutputProperty(Serializer.Property.METHOD, "html"); ser.setOutputProperty(Serializer.Property.ENCODING, kENCODING_UTF_8); ser.setOutputStream(bos); Processor processor = Shell.getProcessor(); DocumentBuilder builder = processor.newDocumentBuilder(); builder.setWhitespaceStrippingPolicy(WhitespaceStrippingPolicy.ALL); XdmNode node = builder.build(new StreamSource( new ByteArrayInputStream(xhtml))); processor.writeXdmValue(node, ser); return bos.toString(kENCODING_UTF_8).trim(); } /* * Serialize as XML */ private byte[] serializeAsXML(XMLEventReader reader ) throws XMLStreamException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); XMLOutputFactory fact = new OutputFactory(); XMLEventWriter writer = fact.createXMLEventWriter(bos, kENCODING_UTF_8); while( reader.hasNext() ){ XMLEvent event = reader.nextEvent(); if( event.isEndElement() && event.asEndElement().getName().equals(kELEM_STRING)) break ; writer.add(event); } writer.flush(); writer.close(); return bos.toByteArray(); } private byte[] serializeAsString(XMLEventReader reader ) throws XMLStreamException, UnsupportedEncodingException, IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); while( reader.hasNext() ){ XMLEvent event = reader.nextEvent(); if( event.isEndElement() && event.asEndElement().getName().equals(kELEM_STRING)) break ; if( event.isCharacters() ) bos.write( event.asCharacters().getData().getBytes( "UTF-8" )); } return bos.toByteArray(); } private void indent(PrintWriter writer) { if( bIndent ){ writer.println(); for( int i = 0 ; i < mLevel ; i++ ) writer.print(' '); } } private void readToEnd(XMLEventReader reader) throws XMLStreamException { while( reader.hasNext() && ! reader.peek().isEndElement() ) reader.nextEvent(); if( reader.hasNext()) reader.nextEvent(); } private String readChars(XMLEventReader reader) throws XMLStreamException { StringBuffer sb = new StringBuffer(); while( reader.hasNext() && reader.peek().isCharacters() ){ Characters ch = reader.nextEvent().asCharacters(); sb.append( ch.getData() ); } return sb.toString(); } } // // //Copyright (C) 2008-2014 David A. Lee. // //The contents of this file are subject to the "Simplified BSD License" (the "License"); //you may not use this file except in compliance with the License. You may obtain a copy of the //License at http://www.opensource.org/licenses/bsd-license.php // //Software distributed under the License is distributed on an "AS IS" basis, //WITHOUT WARRANTY OF ANY KIND, either express or implied. //See the License for the specific language governing rights and limitations under the License. // //The Original Code is: all this file. // //The Initial Developer of the Original Code is David A. Lee // //Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved. // //Contributor(s): none. //