package com.gorillalogic.monkeytalk.processor.report.detail;
import org.apache.commons.lang.StringEscapeUtils;
import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;
public class XmlUtils {
/**
* Pretty-prints xml, supplied as a string.
* <p/>
* eg.
* <code>
* String formattedXml = XmlUtil.format("<tag><nested>hello</nested></tag>");
* </code>
*/
public static String format(String xml) {
try {
final InputSource src = new InputSource(new StringReader(xml));
final Node document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement();
final Boolean keepDeclaration = Boolean.valueOf(xml.startsWith("<?xml"));
//May need this: System.setProperty(DOMImplementationRegistry.PROPERTY,"com.sun.org.apache.xerces.internal.dom.DOMImplementationSourceImpl");
final DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
final DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
final LSSerializer writer = impl.createLSSerializer();
writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); // Set this to true if the output needs to be beautified.
writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); // Set this to true if the declaration is needed to be outputted.
return writer.writeToString(document);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static String escapeXml(String str) {
return StringEscapeUtils.escapeXml(str);
}
public static void main(String[] args) {
String unformattedXml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><QueryMessage\n" +
" xmlns=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message\"\n" +
" xmlns:query=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/query\">\n" +
" <Query>\n" +
" <query:CategorySchemeWhere>\n" +
" \t\t\t\t\t <query:AgencyID>ECB\n\n\n\n</query:AgencyID>\n" +
" </query:CategorySchemeWhere>\n" +
" </Query>\n\n\n\n\n" +
"</QueryMessage>";
System.out.println(XmlUtils.format(unformattedXml));
}
/** prints "all-tag" XML with nested indentation but no other "niceties"
* took this approach to avoid format()'s re-ordering of attributes
*
*/
public static String passablePrint(String xml) {
if (xml==null || xml.length()==0) {
return xml;
}
StringBuilder sb = new StringBuilder();
String state="NOT_IN_TAG";
int level = 0;
String indent = " ";
String cdataOpenSig = "![CDATA[";
StringBuilder cdataOpen = new StringBuilder();
for (int i=0; i<xml.length(); i++) {
char c = xml.charAt(i);
if (state.equals("CDATA_CLOSE_PENDING")) {
sb.append(c);
if (c=='>') {
state="NOT_IN_TAG";
} else if (c!=']') {
state="IN_CDATA";
}
continue;
}
if (state.equals("CDATA_OPEN_PENDING")) {
cdataOpen.append(c);
if (!cdataOpenSig.equals(cdataOpen.toString())) {
sb.append(cdataOpen.toString());
cdataOpen.setLength(0);
state="IN_CDATA";
} else if (!(cdataOpenSig.startsWith(cdataOpen.toString()))) {
// not a match
sb.append(cdataOpen.toString());
cdataOpen.setLength(0);
state="IN_TAG";
}
continue;
}
if (state.equals("IN_CDATA")) {
sb.append(c);
if (c==']') {
state="CDATA_CLOSE_PENDING";
}
continue;
}
if (state.equals("TAG_PENDING")) {
if (c=='/') {
state="IN_CLOSE_TAG";
sb.append("\n");
level--;
for (int j=0; j<level; j++) {
sb.append(indent);
}
sb.append("</");
} else if (c=='!') {
state="CDATA_OPEN_PENDING";
sb.append('<');
cdataOpen.append(c);
continue;
} else {
state="IN_TAG";
sb.append("\n");
for (int j=0; j<level; j++) {
sb.append(indent);
}
level++;
sb.append("<");
sb.append(c);
}
continue;
}
if (c=='<') {
state="TAG_PENDING";
continue;
}
if (c=='>') {
sb.append(c);
state="NOT_IN_TAG";
continue;
}
if (state=="IN_TAG" || state=="IN_CLOSE_TAG") {
sb.append(c);
continue;
}
if (state=="NOT_IN_TAG") {
// skip
continue;
}
System.err.println("what about me, boss? '" + c + "' state is: " + state + " at offset " + i);
}
return sb.toString();
//return XmlUtils.format(xml);
}
}