/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.kvstore.impl; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import org.kiji.annotations.ApiAudience; import org.kiji.mapreduce.kvstore.KeyValueStore; import org.kiji.mapreduce.kvstore.framework.KeyValueStoreConfiguration; import org.kiji.mapreduce.kvstore.lib.TextFileKeyValueStore; /** * Utility that parses an XML file that specifies KeyValueStore implementations * to bind in an application. */ @ApiAudience.Private public final class XmlKeyValueStoreParser { private static final Logger LOG = LoggerFactory.getLogger( XmlKeyValueStoreParser.class.getName()); /** The Configuration used by ReflectionUtils to instantiate new KeyValueStores. */ private Configuration mConf; /** * Creates a new instance. * * @param conf the Configuration to use to instantiate KeyValueStores. */ private XmlKeyValueStoreParser(Configuration conf) { mConf = conf; } /** * This method returns XmlKeyValueStoreParser instance. * * @param conf the Hadoop Configuration to use to initialize the KeyValueStores. * @return an XmlKeyValueStoreParser instance. */ public static XmlKeyValueStoreParser get(Configuration conf) { return new XmlKeyValueStoreParser(conf); } /** * Given an InputStream pointing to an opened resource that specifies a set of KeyValueStores * via XML, return the map of names to configured KeyValueStore instances. The caller is * responsible for closing the InputStream. * * <p>If an XML file tries to bind the same name to multiple stores, this will throw an * IOException.</p> * * @param xmlStream the InputStream pointing to the XML resource to load * @return a map from names to configured KeyValueStore instances. * @throws IOException if there is an error reading from the input stream or parsing the XML. */ public Map<String, KeyValueStore<?, ?>> loadStoresFromXml(InputStream xmlStream) throws IOException { Map<String, KeyValueStore<?, ?>> outMap = new HashMap<String, KeyValueStore<?, ?>>(); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(xmlStream); Element root = doc.getDocumentElement(); root.normalize(); if (!root.getNodeName().equals("stores")) { throw new IOException("Expected <stores> as root element."); } for (int i = 0; i < root.getChildNodes().getLength(); i++) { Node node = root.getChildNodes().item(i); if (node.getNodeType() != Node.ELEMENT_NODE) { continue; } if (node.getNodeName().equals("store")) { NamedNodeMap attrs = node.getAttributes(); Node nameNode = attrs.getNamedItem("name"); Node classNode = attrs.getNamedItem("class"); if (null == nameNode) { throw new IOException("Expected 'name' attribute in <store>"); } else if (null == classNode) { throw new IOException("Expected 'class' attribute in <store>"); } String storeName = nameNode.getNodeValue(); String storeClassStr = classNode.getNodeValue(); if (storeName.isEmpty()) { throw new IOException("Expected non-empty store name"); } else if (storeClassStr.isEmpty()) { throw new IOException("Expected non-empty store class"); } if (outMap.containsKey(storeName)) { throw new IOException("Store with name \"" + storeName + "\" is defined multiple times"); } // If the store class string does not contain any package specification, // auto-append org.kiji.mapreduce.kvstore. // TODO(KIJI-364): Make this compatible with user-written kvstores that live // in the default package. (Maybe try instantiating them first?) if (!storeClassStr.contains(".")) { // TODO: Make this a sane integration when these classes move to kiji-mr-lib storeClassStr = TextFileKeyValueStore.class.getPackage().getName() + "." + storeClassStr; } try { Class<?> userStoreClass = Class.forName(storeClassStr); if (!KeyValueStore.class.isAssignableFrom(userStoreClass)) { throw new IOException("Class " + userStoreClass.getName() + " does not extend KeyValueStore"); } @SuppressWarnings("rawtypes") Class<? extends KeyValueStore> storeClass = userStoreClass.asSubclass(KeyValueStore.class); LOG.info("Instantiating " + storeClass.getName() + " for store name " + storeName); // Create the store instance, and then configure it by // parsing the <store> element. KeyValueStore<?, ?> store = ReflectionUtils.newInstance(storeClass, mConf); configureFromXml(store, storeName, node); outMap.put(storeName, store); } catch (ClassNotFoundException cnfe) { throw new IOException("No such class: " + storeClassStr, cnfe); } } else { // We only expect <store> blocks in here. throw new IOException("Unexpected first-level element: " + node.getNodeName()); } } } catch (ParserConfigurationException pce) { throw new IOException(pce); } catch (SAXException se) { throw new IOException(se); } return outMap; } /** * Allows a store to define a mechanism for reading arbitrary serialized data from an * XML file specifying KeyValueStore definitions. * * <p>KeyValueStore definitions may be specified in an XML file applied by the user. * Each store is defined in a <store> element. KeyValueStore implementations should * expect the store element to contain a single child element called <configuration>. * This method reads this child element * as a KeyValueStoreConfiguration object, then initializes the argument KeyValueStore by passing * this to initFromConf().</p> * * <p>If no <configuration> element is present, the KeyValueStore is initialized * with an empty KeyValueStoreConfiguration.</p> * * @param store the store instance being configured. * @param storeName the name being bound to this store instance. * @param xmlNode the w3c DOM node representing the <store> element in the document. * @throws IOException if there is an error parsing the XML document node. */ private void configureFromXml(KeyValueStore<?, ?> store, String storeName, Node xmlNode) throws IOException { NodeList storeChildren = xmlNode.getChildNodes(); Node configurationNode = null; int numRealChildren = 0; for (int j = 0; j < storeChildren.getLength(); j++) { Node storeChild = storeChildren.item(j); if (storeChild.getNodeType() != Node.ELEMENT_NODE) { continue; } else { numRealChildren++; if (storeChild.getNodeName().equals("configuration")) { configurationNode = storeChild; } } } if (numRealChildren > 1) { // Don't recognize the XML schema here. throw new IOException("Unrecognized XML schema for store " + storeName + "; expected <configuration> element."); } else if (numRealChildren == 0) { assert null == configurationNode; LOG.warn("No <configuration> supplied for store " + storeName); store.initFromConf( KeyValueStoreConfiguration.createInConfiguration(new Configuration(mConf), 0)); } else if (null == configurationNode) { // Got a single child element, but it wasn't a <configuration>. throw new IOException("Unrecognized XML schema for store " + storeName + "; expected <configuration> element."); } else { assert numRealChildren == 1; // Configure the store by parsing the <configuration> element. // The keys in this returned storeConf are all wrapped in a per-kvstore namespace, // and do not contain any default keys. Configuration storeConf = parseConfiguration(configurationNode); // Create a "real" configuration with defaults, and add the elements of storeConf on top. Configuration conf = new Configuration(mConf); for (Map.Entry<String, String> entry : storeConf) { conf.set(entry.getKey(), entry.getValue()); } store.initFromConf(KeyValueStoreConfiguration.createInConfiguration(conf, 0)); } } /** * Given a DOM Node object that represents a <configuration> block * within a <store> object, reformat this as an xml document that can be parsed * by {@link org.apache.hadoop.conf.Configuration}, and then return a * Configuration instance to pass into a KeyValueStore object to instantiate. * * @param configNode a node representing a <configuration> element * in the DOM that is the root of the KeyValueStore's configuration. * @return a new Configuration containing the key-value pairs associated * with this node. * @throws IOException if there's an error processing the XML data. */ private Configuration parseConfiguration(Node configNode) throws IOException { if (null == configNode) { return null; } else if (!configNode.getNodeName().equals("configuration")) { throw new IOException("Expected <configuration> node, got " + configNode.getNodeName()); } try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.newDocument(); Element root = document.createElement("configuration"); document.appendChild(root); copyConfigNodes(root, configNode, document); TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer = tf.newTransformer(); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); transformer.transform(new DOMSource(document), new StreamResult(outStream)); String confXmlText = outStream.toString("UTF-8"); // This only contains entries from the XML file component for this store; no defaults. Configuration conf = new Configuration(false); conf.addResource(new ByteArrayInputStream(confXmlText.getBytes("UTF-8"))); // Use KeyValueStoreConfiguration.fromConf() to remap these nodes into a namespace // for this individual key-value store, but return the underlying Configuration object. return KeyValueStoreConfiguration.fromConf(conf).getDelegate(); } catch (TransformerConfigurationException e) { throw new RuntimeException(e); } catch (TransformerException e) { throw new RuntimeException(e); } catch (ParserConfigurationException e) { throw new IOException(e); } } /** * Given a src and dest node that both represent <configuration> * elements, copy the <property> objects from src to dest. * * <p>The dest element is structurally modified by this operation. The * src argument is not modified.</p> * * <p>The <name> and <value> elements within each property * are copied across; other elements such as <final> are ignored.</p> * * <p>The text associated with each name is modified to include a "header" * that mirrors the KeyValueStore configuration serialization system; * the properties are placed in the sub-namespace of the configuration * associated with the '0' KeyValueStore being serialized to a Configuration * instance via {@link KeyValueStore#storeToConf(KeyValueStoreConfiguration)}.</p> * * @param src the input <configuration> element. * @param dest the target <configuration> element. * @param doc the target XML document. * @throws IOException if there is an error parsing the XML. */ private void copyConfigNodes(Element dest, Node src, Document doc) throws IOException { assert null != dest; assert null != src; NodeList children = src.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() != Node.ELEMENT_NODE) { continue; } if (child.getNodeName().equals("property")) { Node outProp = copyPropertyNode(child, doc); dest.appendChild(outProp); } else { throw new IOException("Unexpected element in configuration: " + child.getNodeName()); } } } /** * Deep copies a node representing an <property> element. * * <p>Modifies the <name> element to include a header that puts the * property in the "namespace" of the 0 element KeyValueStore in a Configuration.</p> * * @param propertyNode the input property to clone. * @param doc the output XML document we're building * @return a Node representing the same property in the "namespace" of the 0 * element KeyValueStore in a Configuration. * @throws IOException if there is an error parsing the input XML. */ private Node copyPropertyNode(Node propertyNode, Document doc) throws IOException { Element out = doc.createElement("property"); NodeList propChildren = propertyNode.getChildNodes(); for (int i = 0; i < propChildren.getLength(); i++) { Node child = propChildren.item(i); if (child.getNodeType() != Node.ELEMENT_NODE) { continue; } else if (child.getNodeName().equals("name")) { Element outName = doc.createElement("name"); String inName = getChildText(child); outName.appendChild(doc.createTextNode(inName)); out.appendChild(outName); } else if (child.getNodeName().equals("value")) { Element outVal = doc.createElement("value"); outVal.appendChild(doc.createTextNode(getChildText(child))); out.appendChild(outVal); } } return out; } /** * Given an element with a text child, return the string contents of that * text child. * * @param elem the input element node. * @return the string contents of the single text child element. * @throws IOException if the XML DOM under this element is not a single text node. */ private String getChildText(Node elem) throws IOException { assert elem.getNodeType() == Node.ELEMENT_NODE; NodeList children = elem.getChildNodes(); if (children.getLength() != 1) { throw new IOException("Expected exactly one text value under " + elem.getNodeName()); } return children.item(0).getNodeValue(); } }