/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.kr2rml;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.kr2rml.ErrorReport.Priority;
import edu.isi.karma.modeling.Namespaces;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.ontology.OntologyManager;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.Node;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Row;
import edu.isi.karma.rep.Table;
import edu.isi.karma.rep.Worksheet;
public class KR2RMLWorksheetRDFGenerator {
private RepFactory factory;
private Worksheet worksheet;
private String outputFileName;
private OntologyManager ontMgr;
private ErrorReport errorReport;
private boolean addColumnContextInformation;
private KR2RMLMappingAuxillaryInformation auxInfo;
private Map<String, String> prefixToNamespaceMap;
private Map<String, String> hNodeToContextUriMap;
private PrintWriter outWriter;
private Logger logger = LoggerFactory.getLogger(KR2RMLWorksheetRDFGenerator.class);
public static String BLANK_NODE_PREFIX = "_:";
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, String outputFileName, boolean addColumnContextInformation,
KR2RMLMappingAuxillaryInformation auxInfo, ErrorReport errorReport) {
super();
this.ontMgr = ontMgr;
this.auxInfo = auxInfo;
this.factory = factory;
this.worksheet = worksheet;
this.outputFileName = outputFileName;
this.errorReport = errorReport;
this.prefixToNamespaceMap = new HashMap<String, String>();
this.hNodeToContextUriMap = new HashMap<String, String>();
this.addColumnContextInformation = addColumnContextInformation;
populatePrefixToNamespaceMap();
}
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, PrintWriter writer, KR2RMLMappingAuxillaryInformation auxInfo,
ErrorReport errorReport, boolean addColumnContextInformation) {
super();
this.ontMgr = ontMgr;
this.auxInfo = auxInfo;
this.factory = factory;
this.worksheet = worksheet;
this.outWriter = writer;;
this.errorReport = errorReport;
this.prefixToNamespaceMap = new HashMap<String, String>();
this.hNodeToContextUriMap = new HashMap<String, String>();
this.addColumnContextInformation = addColumnContextInformation;
populatePrefixToNamespaceMap();
}
public void generateRDF(boolean closeWriterAfterGeneration) throws IOException {
// Prepare the output writer
BufferedWriter bw = null;
try {
if(this.outWriter == null && this.outputFileName != null){
bw = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(this.outputFileName),"UTF-8"));
outWriter = new PrintWriter (bw);
} else if (this.outWriter == null && this.outputFileName == null) {
outWriter = new PrintWriter (System.out);
}
// RDF Generation starts at the top level rows
ArrayList<Row> rows = this.worksheet.getDataTable().getRows(0,
this.worksheet.getDataTable().getNumRows());
int i=1;
for (Row row:rows) {
Set<String> rowTriplesSet = new HashSet<String>();
Set<String> rowPredicatesCovered = new HashSet<String>();
Set<String> predicatesSuccessful = new HashSet<String>();
Map<String, ReportMessage> predicatesFailed = new HashMap<String,ReportMessage>();
generateTriplesForRow(row, rowTriplesSet, rowPredicatesCovered, predicatesFailed, predicatesSuccessful);
outWriter.println();
if (i++%2000 == 0)
logger.info("Done processing " + i + " rows");
for (ReportMessage errMsg:predicatesFailed.values()){
this.errorReport.addReportMessage(errMsg);
}
}
// Generate column provenance information if required
if (addColumnContextInformation) {
generateColumnProvenanceInformation();
}
} finally {
if (closeWriterAfterGeneration) {
outWriter.flush();
outWriter.close();
if(bw != null)
bw.close();
}
}
// An attempt to prevent an occasional error that occurs on Windows platform
// The requested operation cannot be performed on a file with a user-mapped section open
System.gc();
}
public void generateTriplesForRow(Row row, Set<String> existingTopRowTriples,
Set<String> predicatesCovered, Map<String, ReportMessage> predicatesFailed,
Set<String> predicatesSuccessful) {
Map<String, Node> rowNodes = row.getNodesMap();
for (String hNodeId:rowNodes.keySet()) {
Node rowNode = rowNodes.get(hNodeId);
if (rowNode.hasNestedTable()) {
Table rowNodeTable = rowNode.getNestedTable();
if (rowNodeTable != null) {
for (Row nestedTableRow:rowNodeTable.getRows(0, rowNodeTable.getNumRows())) {
Set<String> rowPredicatesCovered = new HashSet<String>();
generateTriplesForRow(nestedTableRow, existingTopRowTriples,
rowPredicatesCovered, predicatesFailed, predicatesSuccessful);
}
}
} else {
generateTriplesForCell(rowNode, existingTopRowTriples, hNodeId,
predicatesCovered, predicatesFailed, predicatesSuccessful);
}
}
}
public void generateTriplesForCell(Node node, Set<String> existingTopRowTriples,
String hNodeId, Set<String> predicatesCovered,
Map<String, ReportMessage> predicatesFailed, Set<String> predicatesSuccessful) {
Map<String, String> columnValues = node.getColumnValues();
List<PredicateObjectMap> pomList = this.auxInfo.getHNodeIdToPredObjLinks().get(hNodeId);
if (pomList == null || pomList.isEmpty())
return;
List<TriplesMap> toBeProcessedTriplesMap = new LinkedList<TriplesMap>();
for (PredicateObjectMap pom:pomList) {
toBeProcessedTriplesMap.add(pom.getTriplesMap());
}
Set<String> alreadyProcessedTriplesMapIds = new HashSet<String>();
while (!toBeProcessedTriplesMap.isEmpty()) {
TriplesMap trMap = toBeProcessedTriplesMap.remove(0);
boolean dontAddNeighboringMaps = false;
// Generate properties for the triple maps
for (PredicateObjectMap pom:trMap.getPredicateObjectMaps()) {
if (!predicatesCovered.contains(pom.getPredicate().getId())) {
generatePropertyForPredObjMap(pom, columnValues, predicatesCovered,
existingTopRowTriples, hNodeId, predicatesFailed, predicatesSuccessful);
}
}
// Need to stop at the root
if (trMap.getSubject().isSteinerTreeRootNode()) {
dontAddNeighboringMaps = true;
}
List<TriplesMapLink> neighboringLinks = this.auxInfo.getTriplesMapGraph()
.getAllNeighboringTriplesMap(trMap.getId());
for (TriplesMapLink trMapLink:neighboringLinks) {
if (predicatesCovered.contains(trMapLink.getPredicateObjectMapLink().getPredicate().getId()))
continue;
// Add the other triplesMap in queue to be processed later
if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getSourceMap().getId())
&& !dontAddNeighboringMaps) {
toBeProcessedTriplesMap.add(trMapLink.getSourceMap());
}
if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getTargetMap().getId())
&& !dontAddNeighboringMaps) {
toBeProcessedTriplesMap.add(trMapLink.getTargetMap());
}
}
alreadyProcessedTriplesMapIds.add(trMap.getId());
}
}
private void generatePropertyForPredObjMap(PredicateObjectMap pom,
Map<String, String> columnValues, Set<String> predicatesCovered,
Set<String> existingTopRowTriples, String hNodeId,
Map<String, ReportMessage> predicatesFailed, Set<String> predicatesSuccessful) {
SubjectMap subjMap = pom.getTriplesMap().getSubject();
// Generate subject RDF
String subjUri = "";
try {
subjUri = generateSubjectMapRDF(subjMap, existingTopRowTriples, columnValues);
} catch (ValueNotFoundKarmaException ve) {
ReportMessage msg = createReportMessage("Could not generate subject's RDF and URI for <i>predicate:" +
pom.getPredicate().getTemplate().toString().replaceAll("<", "{").replaceAll(">", "}") +
", subject node: " + subjMap.getId()+"</i>", ve,
this.factory.getHNode(hNodeId).getColumnName());
if (!predicatesSuccessful.contains(pom.getPredicate().getId()))
predicatesFailed.put(pom.getPredicate().getId(), msg);
return;
} catch (NoValueFoundInNodeException e) {
logger.debug("No value found in a node required to generate subject's RDF or URI.");
return;
}
// Generate the predicate RDF
String predicateUri = "";
try {
predicateUri = getTemplateTermSetPopulatedWithValues(columnValues,
pom.getPredicate().getTemplate()).replaceAll(" ", "");
if (predicateUri.equals(Uris.CLASS_INSTANCE_LINK_URI)
|| predicateUri.equals(Uris.COLUMN_SUBCLASS_LINK_URI)) {
return;
}
} catch (ValueNotFoundKarmaException ve) {
ReportMessage msg = createReportMessage("Could not generate predicate's URI for <i>predicate:" +
pom.getPredicate().getTemplate().toString().replaceAll("<", "{").replaceAll(">", "}") +
", subject node: " + subjMap.getId() + "</i>", ve,
this.factory.getHNode(hNodeId).getColumnName());
if (!predicatesSuccessful.contains(pom.getPredicate().getId()))
predicatesFailed.put(pom.getPredicate().getId(), msg);
return;
} catch (NoValueFoundInNodeException e) {
logger.debug("No value found in a node required to generate predicate's URI.");
return;
}
// Object property
if (pom.getObject().hasRefObjectMap()) {
// Generate the object URI
TriplesMap objPropertyObjectTriplesMap = pom.getObject().getRefObjectMap().
getParentTriplesMap();
String objUri = "";
try {
objUri = generateSubjectMapRDF(objPropertyObjectTriplesMap.getSubject(),
existingTopRowTriples, columnValues);
} catch (ValueNotFoundKarmaException ve) {
ReportMessage msg = createReportMessage("Could not generate object's URI for <i>predicate:" +
pom.getPredicate().getTemplate().toString()
.replaceAll("<", "{").replaceAll(">", "}") +
", subject node: " + pom.getTriplesMap().getSubject().getId()+"</i>", ve
, this.factory.getHNode(hNodeId).getColumnName());
if (!predicatesSuccessful.contains(pom.getPredicate().getId()))
predicatesFailed.put(pom.getPredicate().getId(), msg);
return;
} catch (NoValueFoundInNodeException e) {
logger.debug("No value found in a node required to generate object's URI for a predicate.");
return;
}
String triple = constructTripleWithURIObject(subjUri, predicateUri, objUri);
if (!existingTopRowTriples.contains(triple)) {
outWriter.println(triple);
existingTopRowTriples.add(triple);
}
}
// Data Property
else {
// Get the value
String value = "";
try {
value = getTemplateTermSetPopulatedWithValues(columnValues, pom.getObject().getTemplate());
if (value == null || value.trim().equals(""))
return;
} catch (ValueNotFoundKarmaException ve) {
ReportMessage msg = createReportMessage("Could not retrieve value for the <i>predicate:" +
pom.getPredicate().getTemplate().toString().replaceAll("<", "{").replaceAll(">", "}") +
", subject node: " + subjMap.getId()+"</i>", ve,
this.factory.getHNode(hNodeId).getColumnName());
if (!predicatesSuccessful.contains(pom.getPredicate().getId()))
predicatesFailed.put(pom.getPredicate().getId(), msg);
return;
} catch (NoValueFoundInNodeException e) {
logger.debug("No value found in a node required to generate value for a predicate.");
return;
}
if (addColumnContextInformation) {
TemplateTermSet templ = pom.getObject().getTemplate();
if (templ.isSingleColumnTerm()) {
String hNodeId_val = templ.getAllTerms().get(0).getTemplateTermValue();
String quad = constructQuadWithLiteralObject(subjUri, predicateUri, value,
"", hNodeId_val);
if (!existingTopRowTriples.contains(quad)) {
existingTopRowTriples.add(quad);
outWriter.println(quad);
}
}
} else {
String triple = constructTripleWithLiteralObject(subjUri, predicateUri, value, "");
if (!existingTopRowTriples.contains(triple)) {
existingTopRowTriples.add(triple);
outWriter.println(triple);
}
}
}
predicatesCovered.add(pom.getPredicate().getId());
predicatesSuccessful.add(pom.getPredicate().getId());
if (predicatesFailed.containsKey(pom.getPredicate().getId()))
predicatesFailed.remove(pom.getPredicate().getId());
}
private ReportMessage createReportMessage(String title, ValueNotFoundKarmaException ve,
String cellColumnName) {
ReportMessage msg = new ReportMessage(title, ve.getMessage()
+ " from column: <i>" + cellColumnName + "</i>", Priority.high);
return msg;
}
private String generateSubjectMapRDF(SubjectMap subjMap, Set<String> existingTopRowTriples, Map<String,
String> columnValues) throws ValueNotFoundKarmaException, NoValueFoundInNodeException {
// Generate URI for subject
String uri = "";
if (subjMap.isBlankNode()) {
uri = getExpandedAndNormalizedUri(getBlankNodeUri(subjMap.getId(), columnValues));
} else {
uri = getExpandedAndNormalizedUri(getTemplateTermSetPopulatedWithValues(columnValues,
subjMap.getTemplate()));
}
// Generate triples for specifying the types
for (TemplateTermSet typeTerm:subjMap.getRdfsType()) {
String typeUri = getExpandedAndNormalizedUri(getTemplateTermSetPopulatedWithValues(
columnValues, typeTerm));
String triple = constructTripleWithURIObject(uri, Uris.RDF_TYPE_URI, typeUri);
if (!existingTopRowTriples.contains(triple)) {
existingTopRowTriples.add(triple);
outWriter.println(triple);
}
}
return uri;
}
private String constructTripleWithURIObject(String subjUri, String predicateUri, String objectUri) {
return subjUri + " "
+ getExpandedAndNormalizedUri(predicateUri) + " "
+ objectUri + " .";
}
private String constructTripleWithLiteralObject(String subjUri, String predicateUri, String value,
String literalType) {
// Use Apache Commons to escape the value
value = StringEscapeUtils.escapeJava(value);
// Add the RDF literal type to the literal if present
if (literalType != null && !literalType.equals("")) {
return subjUri + " " + getExpandedAndNormalizedUri(predicateUri) + " \"" + value +
"\"" + "^^" + literalType + " .";
}
return subjUri + " " + getExpandedAndNormalizedUri(predicateUri) + " \"" + value + "\" .";
}
private String constructQuadWithLiteralObject(String subjUri, String predicateUri,
String value, String literalType, String valueHNodeId) {
String triple = constructTripleWithLiteralObject(subjUri, predicateUri, value, literalType);
String columnContextUri = getColumnContextUri(valueHNodeId);
if (triple.length() > 2)
return triple.substring(0, triple.length()-1) + "<" + columnContextUri + "> ." ;
else
return "";
}
private String getBlankNodeUri(String subjMapid, Map<String, String> columnValues)
throws ValueNotFoundKarmaException {
// System.out.println("Column values: " + columnValues);
StringBuilder output = new StringBuilder();
// Add the blank namespace
output.append(BLANK_NODE_PREFIX);
// Add the class node prefix
output.append(this.auxInfo.getBlankNodesUriPrefixMap().get(subjMapid).replaceAll(":", "_"));
// Add the node ids for tha columns covered
List<String> hNodeIdsCovered = this.auxInfo.getBlankNodesColumnCoverage().get(subjMapid);
// System.out.println("Blank node coverage: " + this.auxInfo.getBlankNodesColumnCoverage().get(subjMapid));
if (hNodeIdsCovered != null && !hNodeIdsCovered.isEmpty()) {
for (int i=0; i<hNodeIdsCovered.size(); i++) {
String hNodeId = hNodeIdsCovered.get(i);
if (columnValues.containsKey(hNodeId)) {
output.append("_" + columnValues.get(hNodeId));
} else {
String columnName = this.factory.getHNode(hNodeId).getColumnName();
// System.out.println("Value not found for " + hNodeId + " Column name:" + columnName);
throw new ValueNotFoundKarmaException("Could not retrieve value while constructing " +
"blank URI of column:" + columnName + ". ", hNodeId);
}
}
}
return output.toString();
}
public String getTemplateTermSetPopulatedWithValues(Map<String, String> columnValues,
TemplateTermSet termSet) throws ValueNotFoundKarmaException, NoValueFoundInNodeException {
StringBuilder output = new StringBuilder();
for (TemplateTerm term:termSet.getAllTerms()) {
// String template term
if (term instanceof StringTemplateTerm) {
output.append(term.getTemplateTermValue());
}
// Column template term
else if (term instanceof ColumnTemplateTerm) {
String hNodeId = term.getTemplateTermValue();
if (columnValues.containsKey(hNodeId)) {
Node node = factory.getNode(columnValues.get(hNodeId));
if (node != null) {
if (node.getValue().asString() == null
|| node.getValue().asString().equals("")) {
throw new NoValueFoundInNodeException();
}
output.append(node.getValue().asString());
}
} else {
String columnName = this.factory.getHNode(hNodeId).getColumnName();
throw new ValueNotFoundKarmaException("Could not retrieve value of column: " +
columnName + ".", hNodeId);
}
}
}
return output.toString();
}
private String getExpandedAndNormalizedUri(String uri) {
// Check if the predicate contains a predicate.
if (!uri.startsWith("http:") && uri.contains(":")) {
// Replace the prefix with proper namespace by looking into the ontology manager
String prefix = uri.substring(0, uri.indexOf(":"));
String namespace = this.prefixToNamespaceMap.get(prefix);
if (namespace == null || namespace.isEmpty()) {
this.errorReport.createReportMessage("Error creating predicate's URI: " + uri,
"No namespace found for the prefix: " + prefix, Priority.high);
// logger.error("No namespace found for the predicate prefix: " + prefix);
} else {
uri = namespace + uri.substring(uri.indexOf(":")+1);
}
}
// Remove all unwanted characters
uri = normalizeUri(uri);
// Put angled brackets if required
if (!uri.startsWith(BLANK_NODE_PREFIX)) {
uri = "<" + uri + ">";
}
return uri;
}
public String normalizeUri(String inputUri) {
return inputUri.replaceAll(" ", "").replaceAll("[,`']", "_");
}
private void populatePrefixToNamespaceMap() {
Map<String, String> prefixMapOntMgr = this.ontMgr.getPrefixMap();
for (String ns:prefixMapOntMgr.keySet()) {
String prefix = prefixMapOntMgr.get(ns);
this.prefixToNamespaceMap.put(prefix, ns);
}
}
private String getColumnContextUri (String hNodeId) {
if (hNodeToContextUriMap.containsKey(hNodeId))
return hNodeToContextUriMap.get(hNodeId);
else {
String randomId = RandomStringUtils.randomAlphanumeric(10);
String uri = Namespaces.KARMA_DEV + randomId + "_" + hNodeId;
hNodeToContextUriMap.put(hNodeId, uri);
return uri;
}
}
private void generateColumnProvenanceInformation() {
for (String hNodeId:hNodeToContextUriMap.keySet()) {
List<String> columnTriples = getColumnContextTriples(hNodeId);
for (String triple:columnTriples) {
outWriter.println(triple);
}
// Generate wasDerivedFrom property if required
HNode hNode = factory.getHNode(hNodeId);
if (hNode.isDerivedFromAnotherColumn()) {
HNode originalHNode = factory.getHNode(hNode.getOriginalColumnHNodeId());
if (originalHNode != null) {
columnTriples = getColumnContextTriples(originalHNode.getId());
for (String triple:columnTriples) {
outWriter.println(triple);
}
String derivedFromTriple = constructTripleWithURIObject(
hNodeToContextUriMap.get(hNodeId), Uris.PROV_WAS_DERIVED_FROM_URI,
getColumnContextUri(originalHNode.getId()));
outWriter.println(derivedFromTriple);
}
}
}
}
private List<String> getColumnContextTriples(String hNodeId) {
List<String> colContextTriples = new ArrayList<String>();
String colUri = getColumnContextUri(hNodeId);
// Generate the type
String typeTriple = constructTripleWithURIObject(colUri, Uris.RDF_TYPE_URI,
Uris.PROV_ENTITY_URI);
colContextTriples.add(typeTriple);
// Generate the label
HNode hNode = factory.getHNode(hNodeId);
String labelTriple = constructTripleWithLiteralObject(colUri, Uris.RDFS_LABEL_URI,
hNode.getColumnName(), "");
colContextTriples.add(labelTriple);
return colContextTriples;
}
}
class ValueNotFoundKarmaException extends Exception{
private static final long serialVersionUID = 1L;
private String offendingColumnHNodeId;
//constructor without parameters
public ValueNotFoundKarmaException() {}
//constructor for exception description
public ValueNotFoundKarmaException(String description, String offendingColumnHNodeId) {
super(description);
this.offendingColumnHNodeId = offendingColumnHNodeId;
}
public String getOffendingColumnHNodeId() {
return this.offendingColumnHNodeId;
}
}
class NoValueFoundInNodeException extends Exception{
private static final long serialVersionUID = 1L;
//constructor without parameters
public NoValueFoundInNodeException() {}
//constructor for exception description
public NoValueFoundInNodeException(String description) {
super(description);
}
}