/* Copyright 2013 Jan Schnasse. * Licensed under the Eclipse Public License 1.0 */ package org.lobid.lodmill; import java.io.StringReader; import org.culturegraph.mf.framework.DefaultObjectPipe; import org.culturegraph.mf.framework.StreamReceiver; import org.culturegraph.mf.framework.annotations.Description; import org.culturegraph.mf.framework.annotations.In; import org.culturegraph.mf.framework.annotations.Out; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; /** * Decodes lines of ntriples * * @author Jan Schnasse * */ @Description("Decodes a record in ntriple format. " + "Creates a new entity for each statement. " + "The rdf subject is decoded as entity, " + "rdf predicates and rdf objects are decoded as literals (as key-value pairs).") @In(String.class) @Out(StreamReceiver.class) public final class NTripleDecoder extends DefaultObjectPipe<String, StreamReceiver> { private int count = 0; @Override public void process(final String str) { getReceiver().startRecord(String.valueOf(++count)); Model model = ModelFactory.createDefaultModel(); model.read(new StringReader(str), "test:uri:" + count, "N-TRIPLE"); StmtIterator iterator = model.listStatements(); while (iterator.hasNext()) { Statement stm = iterator.next(); Resource subject = stm.getSubject(); Property predicate = stm.getPredicate(); RDFNode object = stm.getObject(); getReceiver().startEntity(subject.toString()); if (object.isLiteral()) { getReceiver().literal(predicate.toString(), object.asLiteral().getString()); } else { getReceiver().literal(predicate.toString(), object.toString()); } getReceiver().endEntity(); } getReceiver().endRecord(); } }