/**
* CloudGraph Community Edition (CE) License
*
* This is a community release of CloudGraph, a dual-license suite of
* Service Data Object (SDO) 2.1 services designed for relational and
* big-table style "cloud" databases, such as HBase and others.
* This particular copy of the software is released under the
* version 2 of the GNU General Public License. CloudGraph was developed by
* TerraMeta Software, Inc.
*
* Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved.
*
* General License information can be found below.
*
* This distribution may include materials developed by third
* parties. For license and attribution notices for these
* materials, please refer to the documentation that accompanies
* this distribution (see the "Licenses for Third-Party Components"
* appendix) or view the online documentation at
* <http://cloudgraph.org/licenses/>.
*/
package org.cloudgraph.mapreduce;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
/**
* Supplies fully realized data {@link GraphWritable graphs} as the input value to MapReduce <code>Mapper</code>
* client subclasses, the input key being an offset into the processed file and the
* value being a {@link GraphWritable} assembled from a single SDO graph XML line read from the file.
* Supports detection of changes to the input data graph, and propagation of mutations
* to the underlying data store.
*
* The data graphs supplied to the code>Mapper</code> are ready to further modify or simply commit as is,
* for <code>Mapper</code> clients wishing to modify input graphs and
* commit changes within the map phase. See the below code sample based on the Wikipedia domain model
* which adds a page link to each input graph.
*<p>
*<pre>
*public class PageGraphImporter extends GraphXmlMapper<LongWritable, GraphWritable> {
* @Override
* public void map(LongWritable offset, GraphWritable graph, Context context) throws IOException {
*
* Page page = (Page)graph.getDataGraph().getRootObject();
* page.setPageTitle("New Page1");
*
* // commit above changes
* super.commit(row, graph, context);
* }
*}
*</pre>
*</p>
*
* <p>
* Data graphs of any size of complexity may be supplied to MapReduce jobs including graphs where the underlying
* domain model contains instances of multiple inheritance. The set of data graphs is provided to
* a MapReduce job using a <a href="http://plasma-sdo.org/org/plasma/query/Query.html">query</a>, typically
* supplied using {@link GraphMapReduceSetup}.
* </p>
* <p>
* Data graphs are assembled within a {@link GraphXmlRecordReader} based on the line oriented XML graph data read
* from an underlying file, and are passed to client {@link GraphXmlMapper} extensions.
* </p>
*
* @param <KEYOUT> the output key type
* @param <VALUEOUT> the output value type
*
* @see org.cloudgraph.mapreduce.GraphWritable
* @see org.cloudgraph.mapreduce.GraphXmlRecordReader
* @see org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup
*
* @author Scott Cinnamond
* @since 0.5.8
*/
public class GraphXmlMapper<KEYOUT, VALUEOUT>
extends Mapper<LongWritable, GraphWritable, KEYOUT, VALUEOUT> {
private static Log log = LogFactory.getLog(GraphXmlMapper.class);
public GraphXmlMapper() {
}
@Override
public void map(LongWritable row, GraphWritable graph,
Context context) throws IOException {
//no behavior
}
}