/** * CloudGraph Community Edition (CE) License * * This is a community release of CloudGraph, a dual-license suite of * Service Data Object (SDO) 2.1 services designed for relational and * big-table style "cloud" databases, such as HBase and others. * This particular copy of the software is released under the * version 2 of the GNU General Public License. CloudGraph was developed by * TerraMeta Software, Inc. * * Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved. * * General License information can be found below. * * This distribution may include materials developed by third * parties. For license and attribution notices for these * materials, please refer to the documentation that accompanies * this distribution (see the "Licenses for Third-Party Components" * appendix) or view the online documentation at * <http://cloudgraph.org/licenses/>. */ package org.cloudgraph.mapreduce; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.Writable; import org.plasma.sdo.helper.PlasmaXMLHelper; import org.plasma.sdo.xml.DefaultOptions; import commonj.sdo.DataGraph; import commonj.sdo.helper.XMLDocument; /** * Allows data graphs to be consumable by Hadoop using XML * serialization under standard SDO provided mechanisms. A data graph of any * depth or complexity may be represented including graphs where the underlying * model contains instances of multiple inheritance. No XML Schema is required as the * serialized form is used as an internal representation only, and no XML Schema * validation is performed. * * <p> * For the write operation, the root URI, prepended by an integer representing its length, is written out first as this * is critical for de-serialization in some cases. Then the XML representation is written, also * prepended by an integer representing its length. * During the read operation, the root URI is first un-marshaled and then used as an option for XML * de-serialization. * </p> * * @author Scott Cinnamond * @since 0.5.8 * @see commonj.sdo.DataGraph * @see commonj.sdo.helper.XMLDocument */ public class GraphWritable implements Writable { private DataGraph dataGraph; public GraphWritable() { // for serialization only } public GraphWritable(DataGraph dataGraph) { this.dataGraph = dataGraph; } /** * Returns a * @return */ public DataGraph getDataGraph() { return dataGraph; } /** * The root URI, prepended by an integer representing its length, is written out first as this * is critical for de-serialization in some cases. Then the XML representation is written, also * prepended by an integer representing its length. */ @Override public void write(DataOutput out) throws IOException { String uri = this.dataGraph.getRootObject().getType().getURI(); byte[] uribytes = uri.getBytes(); out.writeInt(uribytes.length); out.write(uribytes); byte[] bytes = serializeGraph(this.dataGraph); out.writeInt(bytes.length); out.write(bytes); } /** * The root URI is first unmarshaled and then used as an option for XML * de-serialization. * (non-Javadoc) * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) */ @Override public void readFields(DataInput in) throws IOException { int length = in.readInt(); if (length == 0) { return; } byte[] buf = new byte[length]; in.readFully(buf); String uri = new String(buf); length = in.readInt(); if(length == 0) { return; } buf = new byte[length]; in.readFully(buf); this.dataGraph = deserializeGraph(buf, uri); } public String toXMLString() throws IOException { return new String(serializeGraph(this.dataGraph)); } private DataGraph deserializeGraph(byte[] buf, String uri) throws IOException { long before = System.currentTimeMillis(); ByteArrayInputStream is = new ByteArrayInputStream(buf); DefaultOptions options = new DefaultOptions(uri); options.setRootNamespacePrefix("ns1"); options.setValidate(false); // no XML schema for the doc necessary or present XMLDocument doc = PlasmaXMLHelper.INSTANCE.load(is, uri, options); long after = System.currentTimeMillis(); //System.out.println(GraphWritable.class.getSimpleName() + " deserialization: " + String.valueOf(after - before)); return doc.getRootObject().getDataGraph(); } private byte[] serializeGraph(DataGraph graph) throws IOException { long before = System.currentTimeMillis(); DefaultOptions options = new DefaultOptions(graph.getRootObject() .getType().getURI()); options.setRootNamespacePrefix("ns1"); //options.setPrettyPrint(false); XMLDocument doc = PlasmaXMLHelper.INSTANCE.createDocument( graph.getRootObject(), graph.getRootObject().getType().getURI(), null); ByteArrayOutputStream os = new ByteArrayOutputStream(); PlasmaXMLHelper.INSTANCE.save(doc, os, options); os.flush(); long after = System.currentTimeMillis(); //System.out.println(GraphWritable.class.getSimpleName() + " serialization: " + String.valueOf(after - before)); return os.toByteArray(); } }