/* Copyright 2013 Fabian Steeg, Pascal Christoph.
* Licensed under the Eclipse Public License 1.0 */
package org.lobid.lodmill;
import java.io.StringWriter;
import java.util.Stack;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.culturegraph.mf.framework.StreamReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.graph.NodeFactory;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.util.ResourceUtils;
/**
* Treats Literals, URIs and Blank Nodes. The latter will be invoked by using
* the <entity> element in the morph file. Output are N-Triples.
*
* @author Fabian Steeg, Pascal Christoph
*/
@Description("Encode a stream as N-Triples")
@In(StreamReceiver.class)
@Out(String.class)
public class PipeEncodeTriples extends AbstractGraphPipeEncoder {
Model model;
Stack<Resource> resources;
private final AtomicInteger ATOMIC_INT = new AtomicInteger();
// dummy subject to store data even if the subject is unknown at first
final static String DUMMY_SUBJECT = "dummy_subject";
final static String HTTP = "http";
final static String URN = "urn";
private boolean fixSubject = false;
private static final Logger LOG =
LoggerFactory.getLogger(PipeEncodeTriples.class);
private boolean storeUrnAsUri = false;
/**
* Sets the default temporary subject.
*/
public PipeEncodeTriples() {
subject = DUMMY_SUBJECT;
}
/**
* Allows to define the subject from outside, e.g. from a flux file.
*
* @param subject set the subject for each triple
*/
public void setSubject(final String subject) {
this.subject = subject;
fixSubject = true;
}
/**
* Allows to store URN's as URI's- Default is to store them as literals.
*
* @param storeUrnAsUri set if urn's should be stored as URIs
*/
public void setStoreUrnAsUri(final String storeUrnAsUri) {
this.storeUrnAsUri = Boolean.parseBoolean(storeUrnAsUri);
}
@Override
public void startRecord(final String identifier) {
model = ModelFactory.createDefaultModel();
resources = new Stack<Resource>();
if (!fixSubject) {
subject = DUMMY_SUBJECT;
}
resources.push(model.createResource(subject));
}
@Override
public void literal(final String name, final String value) {
if (value == null)
return;
if (name.equalsIgnoreCase(SUBJECT_NAME)) {
subject = value;
try {
if (resources.peek().hasURI((DUMMY_SUBJECT))) {
ResourceUtils.renameResource(model.getResource(DUMMY_SUBJECT),
subject);
resources.push(model.createResource(subject));
} else {
resources.push(model.createResource(subject));
}
} catch (Exception e) {
LOG.warn("Problem with name=" + name + " value=" + value, e);
}
} else if (name.startsWith(HTTP)) {
try {
final Property prop = model.createProperty(name);
if (isUriWithScheme(value) && ((value.startsWith(URN) && storeUrnAsUri)
|| value.startsWith(HTTP) || value.startsWith("mailto"))) {
resources.peek().addProperty(prop,
model.asRDFNode(NodeFactory.createURI(value)));
} else {
resources.peek().addProperty(prop, value);
}
} catch (Exception e) {
LOG.warn("Problem with name=" + name + " value=" + value, e);
}
}
}
Resource makeBnode(final String value) {
final Resource res = model.createResource(
new AnonId("_:" + value + ATOMIC_INT.getAndIncrement()));
model.add(resources.peek(), model.createProperty(value), res);
return res;
}
void enterBnode(final Resource res) {
this.resources.push(res);
}
@Override
public void startEntity(final String name) {
enterBnode(makeBnode(name));
}
@Override
public void endEntity() {
this.resources.pop();
}
@Override
public void endRecord() {
// insert subject now if it was not at the beginning of the record
final StringWriter tripleWriter = new StringWriter();
RDFDataMgr.write(tripleWriter, model, Lang.NTRIPLES);
getReceiver().process(tripleWriter.toString());
}
}