/*
* Copyright (C) 2015 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.morphlines.commons;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Configs;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.stdio.AbstractParser;
import org.w3c.dom.Document;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.google.common.collect.Maps;
import com.typesafe.config.Config;
//@formatter:off
/**
* The readXml command parses an InputStream from field specified by field parameter (_attachment_body by default)
* and uses XPath expressions to extract fields and add them into headers.
* Example:
* {
* readXml {
* field : source
* paths : {
* book1 : "/catalog/book[@id='bk101']/author"
* book2 : "/catalog/book[@id='bk102']/genre"
* }
* }
* }
*
* If paths field is empty (paths : { } ) whole xml will be parsed into a String with name _xml.
*/
//@formatter:on
public class ReadXmlBuilder implements CommandBuilder {
private static final String CONF_PATHS = "paths";
private static final String CONF_FIELD = "source";
public Collection<String> getNames() {
return Collections.singletonList("readXml");
}
public Command build(Config config, Command parent, Command child, MorphlineContext context) {
return new ReadXml(this, config, parent, child, context);
}
private static final class ReadXml extends AbstractParser {
private final Map<String, String> stepMap;
private final XPath xpath;
private boolean all = false;
private String field;
private DocumentBuilder docBuilder;
protected ReadXml(CommandBuilder builder, Config config, Command parent, Command child,
MorphlineContext context) {
super(builder, config, parent, child, context);
xpath = XPathFactory.newInstance().newXPath();
stepMap = Maps.newHashMap();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
docBuilder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
Config paths = getConfigs().getConfig(config, CONF_PATHS);
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(paths)) {
String fieldName = entry.getKey();
String path = entry.getValue().toString().trim();
stepMap.put(fieldName, path);
}
field = getConfigs().getString(config, CONF_FIELD, null);
if (stepMap.size() == 0) {
all = true;
}
LOG.debug("stepMap: {}", stepMap);
}
@Override
protected boolean doProcess(Record record, InputStream stream) throws IOException {
Document doc = null;
try {
if (field == null) {
doc = docBuilder.parse(stream);
} else if (record.get(field) != null) {
InputSource is = new InputSource(new StringReader(String.valueOf(record.get(
field).get(0))));
doc = docBuilder.parse(is);
}
} catch (SAXException e) {
LOG.error("Cannot parse body");
return false;
}
Record outputRecord = record.copy();
if (all) {
outputRecord.put("_xml", XMLtoString(doc));
if (!getChild().process(outputRecord)) {
return false;
}
} else {
for (Map.Entry<String, String> entry : stepMap.entrySet()) {
XPathExpression expr = null;
try {
expr = xpath.compile(entry.getValue());
String field = (String) expr.evaluate(doc, XPathConstants.STRING);
outputRecord.put(entry.getKey(), field);
} catch (XPathExpressionException e) {
LOG.error("Invalid XPATH expression -> " + expr);
return false;
}
}
if (!getChild().process(outputRecord)) {
return false;
}
}
return true;
}
public String XMLtoString(Document doc) {
DOMImplementationLS domImplementation = (DOMImplementationLS) doc.getImplementation();
LSSerializer lsSerializer = domImplementation.createLSSerializer();
return lsSerializer.writeToString(doc);
}
}
}