/**
* $Id: xpwd.java 21 2008-07-04 08:33:47Z daldei $
* $Date: 2008-07-04 04:33:47 -0400 (Fri, 04 Jul 2008) $
*
*/
package org.xmlsh.commands.internal;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.List;
import javanet.staxutils.OutputFactory;
import javax.xml.crypto.dsig.TransformException;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.WhitespaceStrippingPolicy;
import net.sf.saxon.s9api.XdmNode;
import org.json.JSONObject;
import org.xmlsh.core.CoreException;
import org.xmlsh.core.InputPort;
import org.xmlsh.core.InvalidArgumentException;
import org.xmlsh.core.Options;
import org.xmlsh.core.OutputPort;
import org.xmlsh.core.UnexpectedException;
import org.xmlsh.core.XCommand;
import org.xmlsh.core.XValue;
import org.xmlsh.sh.shell.SerializeOpts;
import org.xmlsh.sh.shell.Shell;
import org.xmlsh.util.Util;
/*
*
* Convert XML files to an CSV file
*
* Arguments
*
* -header Add a header row
*
*
*/
public class xml2json extends XCommand
{
private boolean bIndent = false ;
private int mLevel = 0; // indentation level
private SerializeOpts mSerializeOpts;
private static final String kENCODING_UTF_8 = "UTF-8";
private static final String kJXML_URI = "http://www.xmlsh.org/jxml";
private static final QName kATTR_ENCODING = new QName("encoding");
private static final QName kATTR_NAME = new QName("name");
private static final QName kATTR_VALUE = new QName("value");
private static final QName kATTR_SRC = new QName("src");
private static final QName kATTR_UNWRAP = new QName("unwrap");
private static final QName kATTR_HTML = new QName( "html" ); // A String formated as XHTML
private static final QName kELEM_XJSON = new QName(kJXML_URI, "xjson" );
private static final QName kELEM_FILE = new QName(kJXML_URI, "file" );
private static final QName kELEM_OBJECT = new QName(kJXML_URI, "object" ); // A JSON Object
private static final QName kELEM_MEMBER = new QName(kJXML_URI, "member" ); // A JSON Object Member
private static final QName kELEM_STRING = new QName(kJXML_URI, "string" ); // A JSON STRING
private static final QName kELEM_NUMBER = new QName(kJXML_URI, "number" ); // A JSON NUMBER
private static final QName kELEM_ARRAY = new QName(kJXML_URI, "array" ); // A JSON ARRAY
private static final QName kELEM_BOOLEAN = new QName(kJXML_URI, "boolean" ); // A JSON Literal (true,false)
private static final QName kELEM_NULL = new QName(kJXML_URI, "null" ); // A JSON Literal null
public int run( List<XValue> args ) throws Exception
{
Options opts = new Options("p=print",SerializeOpts.getOptionDefs());
opts.parse(args);
bIndent = opts.hasOpt("p");
args = opts.getRemainingArgs();
OutputPort stdout = getStdout();
InputPort inp = args.isEmpty() ? getStdin() : getInput( args.get(0) );
SerializeOpts serializeOpts = getSerializeOpts(opts);
XMLEventReader reader = inp.asXMLEventReader(serializeOpts);
// Override the text encoding to UTF-8 - JSON is *always* USTF8
mSerializeOpts =serializeOpts.clone();
serializeOpts.setOutputTextEncoding(kENCODING_UTF_8);
PrintWriter writer = stdout.asPrintWriter(serializeOpts);
parse( reader , writer, false );
writer.flush();
writer.close();
// Consume input or we can get a Piped Close
while( reader.hasNext() )
reader.nextEvent();
reader.close();
inp.release();
return 0;
}
private boolean parse(XMLEventReader reader, PrintWriter writer, boolean bComma ) throws XMLStreamException, CoreException, UnsupportedEncodingException, IOException, TransformException, SaxonApiException {
mLevel++;
while( reader.hasNext() ){
XMLEvent e = reader.nextEvent();
if( e.isStartElement() ){
StartElement start = e.asStartElement();
QName name = start.getName();
if( name.equals(kELEM_XJSON)){
if( mLevel != 1 )
throw new UnexpectedException("XJSON element must be at document root");
// Children become the new roots
mLevel=0;
while( parse( reader , writer , bComma ) )
;
return false;
}
else
if( name.equals(kELEM_FILE)){
if( ! writeFile( start , reader , writer ))
return false ;
}
else
if( bComma )
writer.print(",");
if( name.equals(kELEM_OBJECT) )
writeObject( start , reader , writer );
else
if( name.equals(kELEM_ARRAY))
writeArray( start , reader , writer );
else if(name.equals(kELEM_MEMBER) )
writeMember( start , reader , writer );
else if( name.equals(kELEM_NUMBER))
writeNumber( start , reader , writer );
else if( name.equals(kELEM_BOOLEAN))
writeBoolean( start , reader , writer );
else if( name.equals(kELEM_NULL) )
writeNull( reader , writer );
else if( name.equals(kELEM_STRING))
writeString( start , reader , writer );
else
readToEnd(reader);
mLevel--;
return true ;
}
else
if( e.isEndElement() ){
mLevel--;
return false ;
}
}
mLevel--;
return false ;
}
private boolean writeFile(StartElement start, XMLEventReader reader, PrintWriter writer) throws UnsupportedEncodingException, IOException, XMLStreamException, CoreException, TransformException, SaxonApiException {
Attribute aname = start.getAttributeByName(kATTR_NAME);
if( aname == null )
throw new InvalidArgumentException("Element FILE requries attribute name");
String name = aname.getValue();
//Attribute aencoding = start.getAttributeByName(new QName("encoding"));
//String encoding = (aencoding == null ? "UTF-8" : aencoding.getValue());
PrintWriter w = getShell().getEnv().getOutput( getShell().getFile(name), false).asPrintWriter(mSerializeOpts);
boolean ret = parse(reader,w,false);
w.close();
return ret ;
}
private void writeString(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, FileNotFoundException, IOException, TransformException, SaxonApiException, CoreException {
String value = getAttr( start , kATTR_VALUE);
String src = getAttr( start , kATTR_SRC);
String encoding = getAttr( start, kATTR_ENCODING);
String unwrap = getAttr( start , kATTR_UNWRAP);
String html = getAttr( start , kATTR_HTML);
boolean bReadToEnd = true ;
String chars ;
if( value != null )
chars = value ;
else
if( src != null )
chars = readFile( src , encoding );
else
{
// readString eats the close tag
bReadToEnd = false ;
chars = readString( reader , Util.parseBoolean(html));
}
// If Unwrap then trim off <html> and leading and trailing blanks
if( Util.parseBoolean(unwrap)){
value = unwrap(value);
}
writer.print( JSONObject.quote(chars) );
if( bReadToEnd )
readToEnd(reader);
}
/*
* Parse an HTML element as XML and reserialize as HTML, store as a JSON string
*/
private String readString(XMLEventReader reader, boolean bHTML ) throws TransformException, XMLStreamException, SaxonApiException, IOException
{
byte[] bytes = bHTML ? serializeAsXML( reader ) : serializeAsString(reader);
// String xs = new String(xhtml,klENCODING_UTF_8);
if( bHTML )
return formatAsHtml( bytes );
else
return new String( bytes , kENCODING_UTF_8);
}
/*
* Unwrap a string by
* 1) Remove leading and trailing blanks
* 2) Remove any <html> (any case) from beginning and end
* 3) Remove leading and trailing blanks from the result
*/
private String unwrap(String value)
{
value = value.trim();
if( "<html>".equalsIgnoreCase(value.substring(0,6)) )
value = value.substring(6);
if( "</html>".equalsIgnoreCase(value.substring(value.length() - 7 )))
value = value.substring(0 , value.length() - 7 );
return value.trim();
}
private String readFile(String file, String encoding) throws UnsupportedEncodingException, FileNotFoundException, IOException, CoreException
{
InputPort ip = getShell().getInputPort(file);
Reader r =
new InputStreamReader(
ip.asInputStream(mSerializeOpts) ,
encoding == null ? mSerializeOpts.getInputTextEncoding() : encoding );
StringBuffer sb = new StringBuffer();
char cbuf[] = new char[1000];
int n;
while((n=r.read(cbuf)) > 0 )
sb.append(cbuf, 0, n);
r.close();
ip.close();
return sb.toString();
}
private String getAttr(StartElement start, QName attr) {
Attribute a = start.getAttributeByName(attr);
if(a == null )
return null;
return a.getValue();
}
private void writeNull( XMLEventReader reader, PrintWriter writer) throws XMLStreamException {
writer.print("null");
readToEnd(reader);
}
private void writeBoolean(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException {
writeNumber( start , reader , writer );
}
private void writeNumber(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException {
String chars ;
Attribute v = start.getAttributeByName(kATTR_VALUE);
if( v != null )
chars = v.getValue();
else
chars = readChars( reader );
chars = chars.trim();
writer.print( chars );
readToEnd(reader);
}
private void writeMember(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException {
indent(writer);
String name = start.getAttributeByName( new QName("name")).getValue();
writer.print( JSONObject.quote(name) );
writer.print(":");
if( parse( reader , writer ,false))
readToEnd(reader);
}
private void writeArray(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException {
indent(writer);
writer.print("[");
boolean bFirst = true ;
do {
if( ! parse( reader , writer , ! bFirst ) )
break ;
bFirst = false ;
}
while( true ) ;
writer.print("]");
}
private void writeObject(StartElement start, XMLEventReader reader, PrintWriter writer) throws XMLStreamException, UnsupportedEncodingException, CoreException, IOException, TransformException, SaxonApiException {
indent(writer);
writer.print("{");
boolean bFirst = true ;
do {
if( ! parse( reader , writer , ! bFirst ) )
break ;
bFirst = false ;
}
while( true ) ;
indent(writer);
writer.print("}");
}
/*
* Serialize the body as HTML and return as a string
*/
private String formatAsHtml(byte[] xhtml) throws SaxonApiException, UnsupportedEncodingException
{
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Serializer ser = Shell.getProcessor().newSerializer();
ser.setOutputProperty( Serializer.Property.OMIT_XML_DECLARATION, "yes" );
ser.setOutputProperty(Serializer.Property.INDENT , "no");
ser.setOutputProperty(Serializer.Property.METHOD, "html");
ser.setOutputProperty(Serializer.Property.ENCODING, kENCODING_UTF_8);
ser.setOutputStream(bos);
Processor processor = Shell.getProcessor();
DocumentBuilder builder = processor.newDocumentBuilder();
builder.setWhitespaceStrippingPolicy(WhitespaceStrippingPolicy.ALL);
XdmNode node = builder.build(new StreamSource( new ByteArrayInputStream(xhtml)));
processor.writeXdmValue(node, ser);
return bos.toString(kENCODING_UTF_8).trim();
}
/*
* Serialize as XML
*/
private byte[] serializeAsXML(XMLEventReader reader ) throws XMLStreamException
{
ByteArrayOutputStream bos = new ByteArrayOutputStream();
XMLOutputFactory fact = new OutputFactory();
XMLEventWriter writer = fact.createXMLEventWriter(bos, kENCODING_UTF_8);
while( reader.hasNext() ){
XMLEvent event = reader.nextEvent();
if( event.isEndElement() && event.asEndElement().getName().equals(kELEM_STRING))
break ;
writer.add(event);
}
writer.flush();
writer.close();
return bos.toByteArray();
}
private byte[] serializeAsString(XMLEventReader reader ) throws XMLStreamException, UnsupportedEncodingException, IOException
{
ByteArrayOutputStream bos = new ByteArrayOutputStream();
while( reader.hasNext() ){
XMLEvent event = reader.nextEvent();
if( event.isEndElement() && event.asEndElement().getName().equals(kELEM_STRING))
break ;
if( event.isCharacters() )
bos.write( event.asCharacters().getData().getBytes( "UTF-8" ));
}
return bos.toByteArray();
}
private void indent(PrintWriter writer) {
if( bIndent ){
writer.println();
for( int i = 0 ; i < mLevel ; i++ )
writer.print(' ');
}
}
private void readToEnd(XMLEventReader reader) throws XMLStreamException {
while( reader.hasNext() && ! reader.peek().isEndElement() )
reader.nextEvent();
if( reader.hasNext())
reader.nextEvent();
}
private String readChars(XMLEventReader reader) throws XMLStreamException {
StringBuffer sb = new StringBuffer();
while( reader.hasNext() && reader.peek().isCharacters() ){
Characters ch = reader.nextEvent().asCharacters();
sb.append( ch.getData() );
}
return sb.toString();
}
}
//
//
//Copyright (C) 2008-2014 David A. Lee.
//
//The contents of this file are subject to the "Simplified BSD License" (the "License");
//you may not use this file except in compliance with the License. You may obtain a copy of the
//License at http://www.opensource.org/licenses/bsd-license.php
//
//Software distributed under the License is distributed on an "AS IS" basis,
//WITHOUT WARRANTY OF ANY KIND, either express or implied.
//See the License for the specific language governing rights and limitations under the License.
//
//The Original Code is: all this file.
//
//The Initial Developer of the Original Code is David A. Lee
//
//Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
//Contributor(s): none.
//