/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.kvstore.impl;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.kiji.annotations.ApiAudience;
import org.kiji.mapreduce.kvstore.KeyValueStore;
import org.kiji.mapreduce.kvstore.framework.KeyValueStoreConfiguration;
import org.kiji.mapreduce.kvstore.lib.TextFileKeyValueStore;
/**
* Utility that parses an XML file that specifies KeyValueStore implementations
* to bind in an application.
*/
@ApiAudience.Private
public final class XmlKeyValueStoreParser {
private static final Logger LOG = LoggerFactory.getLogger(
XmlKeyValueStoreParser.class.getName());
/** The Configuration used by ReflectionUtils to instantiate new KeyValueStores. */
private Configuration mConf;
/**
* Creates a new instance.
*
* @param conf the Configuration to use to instantiate KeyValueStores.
*/
private XmlKeyValueStoreParser(Configuration conf) {
mConf = conf;
}
/**
* This method returns XmlKeyValueStoreParser instance.
*
* @param conf the Hadoop Configuration to use to initialize the KeyValueStores.
* @return an XmlKeyValueStoreParser instance.
*/
public static XmlKeyValueStoreParser get(Configuration conf) {
return new XmlKeyValueStoreParser(conf);
}
/**
* Given an InputStream pointing to an opened resource that specifies a set of KeyValueStores
* via XML, return the map of names to configured KeyValueStore instances. The caller is
* responsible for closing the InputStream.
*
* <p>If an XML file tries to bind the same name to multiple stores, this will throw an
* IOException.</p>
*
* @param xmlStream the InputStream pointing to the XML resource to load
* @return a map from names to configured KeyValueStore instances.
* @throws IOException if there is an error reading from the input stream or parsing the XML.
*/
public Map<String, KeyValueStore<?, ?>> loadStoresFromXml(InputStream xmlStream)
throws IOException {
Map<String, KeyValueStore<?, ?>> outMap = new HashMap<String, KeyValueStore<?, ?>>();
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(xmlStream);
Element root = doc.getDocumentElement();
root.normalize();
if (!root.getNodeName().equals("stores")) {
throw new IOException("Expected <stores> as root element.");
}
for (int i = 0; i < root.getChildNodes().getLength(); i++) {
Node node = root.getChildNodes().item(i);
if (node.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
if (node.getNodeName().equals("store")) {
NamedNodeMap attrs = node.getAttributes();
Node nameNode = attrs.getNamedItem("name");
Node classNode = attrs.getNamedItem("class");
if (null == nameNode) {
throw new IOException("Expected 'name' attribute in <store>");
} else if (null == classNode) {
throw new IOException("Expected 'class' attribute in <store>");
}
String storeName = nameNode.getNodeValue();
String storeClassStr = classNode.getNodeValue();
if (storeName.isEmpty()) {
throw new IOException("Expected non-empty store name");
} else if (storeClassStr.isEmpty()) {
throw new IOException("Expected non-empty store class");
}
if (outMap.containsKey(storeName)) {
throw new IOException("Store with name \"" + storeName
+ "\" is defined multiple times");
}
// If the store class string does not contain any package specification,
// auto-append org.kiji.mapreduce.kvstore.
// TODO(KIJI-364): Make this compatible with user-written kvstores that live
// in the default package. (Maybe try instantiating them first?)
if (!storeClassStr.contains(".")) {
// TODO: Make this a sane integration when these classes move to kiji-mr-lib
storeClassStr = TextFileKeyValueStore.class.getPackage().getName()
+ "." + storeClassStr;
}
try {
Class<?> userStoreClass = Class.forName(storeClassStr);
if (!KeyValueStore.class.isAssignableFrom(userStoreClass)) {
throw new IOException("Class " + userStoreClass.getName()
+ " does not extend KeyValueStore");
}
@SuppressWarnings("rawtypes")
Class<? extends KeyValueStore> storeClass =
userStoreClass.asSubclass(KeyValueStore.class);
LOG.info("Instantiating " + storeClass.getName() + " for store name " + storeName);
// Create the store instance, and then configure it by
// parsing the <store> element.
KeyValueStore<?, ?> store = ReflectionUtils.newInstance(storeClass, mConf);
configureFromXml(store, storeName, node);
outMap.put(storeName, store);
} catch (ClassNotFoundException cnfe) {
throw new IOException("No such class: " + storeClassStr, cnfe);
}
} else {
// We only expect <store> blocks in here.
throw new IOException("Unexpected first-level element: " + node.getNodeName());
}
}
} catch (ParserConfigurationException pce) {
throw new IOException(pce);
} catch (SAXException se) {
throw new IOException(se);
}
return outMap;
}
/**
* Allows a store to define a mechanism for reading arbitrary serialized data from an
* XML file specifying KeyValueStore definitions.
*
* <p>KeyValueStore definitions may be specified in an XML file applied by the user.
* Each store is defined in a <store> element. KeyValueStore implementations should
* expect the store element to contain a single child element called <configuration>.
* This method reads this child element
* as a KeyValueStoreConfiguration object, then initializes the argument KeyValueStore by passing
* this to initFromConf().</p>
*
* <p>If no <configuration> element is present, the KeyValueStore is initialized
* with an empty KeyValueStoreConfiguration.</p>
*
* @param store the store instance being configured.
* @param storeName the name being bound to this store instance.
* @param xmlNode the w3c DOM node representing the <store> element in the document.
* @throws IOException if there is an error parsing the XML document node.
*/
private void configureFromXml(KeyValueStore<?, ?> store, String storeName, Node xmlNode)
throws IOException {
NodeList storeChildren = xmlNode.getChildNodes();
Node configurationNode = null;
int numRealChildren = 0;
for (int j = 0; j < storeChildren.getLength(); j++) {
Node storeChild = storeChildren.item(j);
if (storeChild.getNodeType() != Node.ELEMENT_NODE) {
continue;
} else {
numRealChildren++;
if (storeChild.getNodeName().equals("configuration")) {
configurationNode = storeChild;
}
}
}
if (numRealChildren > 1) {
// Don't recognize the XML schema here.
throw new IOException("Unrecognized XML schema for store " + storeName
+ "; expected <configuration> element.");
} else if (numRealChildren == 0) {
assert null == configurationNode;
LOG.warn("No <configuration> supplied for store " + storeName);
store.initFromConf(
KeyValueStoreConfiguration.createInConfiguration(new Configuration(mConf), 0));
} else if (null == configurationNode) {
// Got a single child element, but it wasn't a <configuration>.
throw new IOException("Unrecognized XML schema for store " + storeName
+ "; expected <configuration> element.");
} else {
assert numRealChildren == 1;
// Configure the store by parsing the <configuration> element.
// The keys in this returned storeConf are all wrapped in a per-kvstore namespace,
// and do not contain any default keys.
Configuration storeConf = parseConfiguration(configurationNode);
// Create a "real" configuration with defaults, and add the elements of storeConf on top.
Configuration conf = new Configuration(mConf);
for (Map.Entry<String, String> entry : storeConf) {
conf.set(entry.getKey(), entry.getValue());
}
store.initFromConf(KeyValueStoreConfiguration.createInConfiguration(conf, 0));
}
}
/**
* Given a DOM Node object that represents a <configuration> block
* within a <store> object, reformat this as an xml document that can be parsed
* by {@link org.apache.hadoop.conf.Configuration}, and then return a
* Configuration instance to pass into a KeyValueStore object to instantiate.
*
* @param configNode a node representing a <configuration> element
* in the DOM that is the root of the KeyValueStore's configuration.
* @return a new Configuration containing the key-value pairs associated
* with this node.
* @throws IOException if there's an error processing the XML data.
*/
private Configuration parseConfiguration(Node configNode) throws IOException {
if (null == configNode) {
return null;
} else if (!configNode.getNodeName().equals("configuration")) {
throw new IOException("Expected <configuration> node, got " + configNode.getNodeName());
}
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.newDocument();
Element root = document.createElement("configuration");
document.appendChild(root);
copyConfigNodes(root, configNode, document);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
transformer.transform(new DOMSource(document), new StreamResult(outStream));
String confXmlText = outStream.toString("UTF-8");
// This only contains entries from the XML file component for this store; no defaults.
Configuration conf = new Configuration(false);
conf.addResource(new ByteArrayInputStream(confXmlText.getBytes("UTF-8")));
// Use KeyValueStoreConfiguration.fromConf() to remap these nodes into a namespace
// for this individual key-value store, but return the underlying Configuration object.
return KeyValueStoreConfiguration.fromConf(conf).getDelegate();
} catch (TransformerConfigurationException e) {
throw new RuntimeException(e);
} catch (TransformerException e) {
throw new RuntimeException(e);
} catch (ParserConfigurationException e) {
throw new IOException(e);
}
}
/**
* Given a src and dest node that both represent <configuration>
* elements, copy the <property> objects from src to dest.
*
* <p>The dest element is structurally modified by this operation. The
* src argument is not modified.</p>
*
* <p>The <name> and <value> elements within each property
* are copied across; other elements such as <final> are ignored.</p>
*
* <p>The text associated with each name is modified to include a "header"
* that mirrors the KeyValueStore configuration serialization system;
* the properties are placed in the sub-namespace of the configuration
* associated with the '0' KeyValueStore being serialized to a Configuration
* instance via {@link KeyValueStore#storeToConf(KeyValueStoreConfiguration)}.</p>
*
* @param src the input <configuration> element.
* @param dest the target <configuration> element.
* @param doc the target XML document.
* @throws IOException if there is an error parsing the XML.
*/
private void copyConfigNodes(Element dest, Node src, Document doc) throws IOException {
assert null != dest;
assert null != src;
NodeList children = src.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
if (child.getNodeName().equals("property")) {
Node outProp = copyPropertyNode(child, doc);
dest.appendChild(outProp);
} else {
throw new IOException("Unexpected element in configuration: " + child.getNodeName());
}
}
}
/**
* Deep copies a node representing an <property> element.
*
* <p>Modifies the <name> element to include a header that puts the
* property in the "namespace" of the 0 element KeyValueStore in a Configuration.</p>
*
* @param propertyNode the input property to clone.
* @param doc the output XML document we're building
* @return a Node representing the same property in the "namespace" of the 0
* element KeyValueStore in a Configuration.
* @throws IOException if there is an error parsing the input XML.
*/
private Node copyPropertyNode(Node propertyNode, Document doc) throws IOException {
Element out = doc.createElement("property");
NodeList propChildren = propertyNode.getChildNodes();
for (int i = 0; i < propChildren.getLength(); i++) {
Node child = propChildren.item(i);
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
} else if (child.getNodeName().equals("name")) {
Element outName = doc.createElement("name");
String inName = getChildText(child);
outName.appendChild(doc.createTextNode(inName));
out.appendChild(outName);
} else if (child.getNodeName().equals("value")) {
Element outVal = doc.createElement("value");
outVal.appendChild(doc.createTextNode(getChildText(child)));
out.appendChild(outVal);
}
}
return out;
}
/**
* Given an element with a text child, return the string contents of that
* text child.
*
* @param elem the input element node.
* @return the string contents of the single text child element.
* @throws IOException if the XML DOM under this element is not a single text node.
*/
private String getChildText(Node elem) throws IOException {
assert elem.getNodeType() == Node.ELEMENT_NODE;
NodeList children = elem.getChildNodes();
if (children.getLength() != 1) {
throw new IOException("Expected exactly one text value under " + elem.getNodeName());
}
return children.item(0).getNodeValue();
}
}