/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.kr2rml;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONException;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParseException;
import org.openrdf.sail.memory.MemoryStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.HNodePath;
import edu.isi.karma.rep.HTable;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Worksheet;
public class WorksheetR2RMLSesameModelParser {
private Worksheet worksheet;
private RepFactory factory;
private Repository myRepository;
private RepositoryConnection con;
private ValueFactory f;
// Internal data structures required
private Map<String, SubjectMap> subjectMapIndex;
private Map<String, TriplesMap> triplesMapIndex;
private KR2RMLMappingAuxillaryInformation auxInfo;
private R2RMLMapping r2rmlMapping;
private int predicateIdCounter = 1;
private int objectMapCounter = 1;
private static Logger logger = LoggerFactory.getLogger(WorksheetR2RMLSesameModelParser.class);
public WorksheetR2RMLSesameModelParser(Worksheet worksheet, RepFactory factory, File modelFile)
throws RepositoryException, RDFParseException, IOException, JSONException {
this.worksheet = worksheet;
this.factory = factory;
this.r2rmlMapping = new R2RMLMapping();
this.auxInfo = new KR2RMLMappingAuxillaryInformation();
this.subjectMapIndex = new HashMap<String, SubjectMap>();
this.triplesMapIndex = new HashMap<String, TriplesMap>();
/** Initialize the repository **/
myRepository = new SailRepository(new MemoryStore());
myRepository.initialize();
con = myRepository.getConnection();
f = con.getValueFactory();
con.add(modelFile, "", RDFFormat.TURTLE);
// Generate TriplesMap for each InternalNode in the tree
createSubjectMaps();
// Identify the object property links
createPredicateObjectMaps();
// Calculate the nodes covered by each InternalNode
calculateColumnNodesCoveredByBlankNodes();
con.close();
myRepository.shutDown();
}
public KR2RMLMappingAuxillaryInformation getAuxInfo() {
return auxInfo;
}
public R2RMLMapping getR2rmlMapping() {
return r2rmlMapping;
}
private void createPredicateObjectMaps()
throws RepositoryException, JSONException {
URI trTypeUri = f.createURI(Uris.RR_TRIPLESMAP_CLASS_URI);
// Get all the triple maps
RepositoryResult<Statement> tripleMapsStmts = con.getStatements(null, RDF.TYPE,
trTypeUri, false);
while (tripleMapsStmts.hasNext()) {
Statement st = tripleMapsStmts.next();
Resource trMapRes = st.getSubject();
// Add the predicate object maps
addPredicateObjectMapsForTripleMap(trMapRes);
}
}
private void createSubjectMaps() throws RepositoryException,
JSONException {
URI trTypeUri = f.createURI(Uris.RR_TRIPLESMAP_CLASS_URI);
// Get all the triple maps
RepositoryResult<Statement> tripleMapsStmts = con.getStatements(null, RDF.TYPE,
trTypeUri, false);
while (tripleMapsStmts.hasNext()) {
Statement st = tripleMapsStmts.next();
Resource trMapRes = st.getSubject();
SubjectMap subjMap = addSubjectMapForTripleMap(trMapRes);
// Add the Triples map
TriplesMap trMap = new TriplesMap(trMapRes.stringValue(), subjMap);
this.triplesMapIndex.put(trMapRes.stringValue(), trMap);
this.r2rmlMapping.addTriplesMap(trMap);
}
}
private void addPredicateObjectMapsForTripleMap(Resource trMapRes)
throws RepositoryException, JSONException {
URI predObjMapMapUri = f.createURI(Uris.RR_PRED_OBJ_MAP_URI);
URI predUri = f.createURI(Uris.RR_PREDICATE_URI);
URI objectMapUri = f.createURI(Uris.RR_OBJECTMAP_URI);
URI columnUri = f.createURI(Uris.RR_COLUMN_URI);
URI rfObjClassUri = f.createURI(Uris.RR_REF_OBJECT_MAP_URI);
URI parentTriplesMapUri = f.createURI(Uris.RR_PARENT_TRIPLE_MAP_URI);
RepositoryResult<Statement> predObjStmts = con.getStatements(trMapRes, predObjMapMapUri,
null, false);
TriplesMap trMap = this.triplesMapIndex.get(trMapRes.stringValue());
if (trMap == null) {
logger.error("No Triples Map found for resource: " + trMapRes.stringValue());
return;
}
while (predObjStmts.hasNext()) {
Statement pomStmt = predObjStmts.next();
Resource pomBlankNode = (Resource) pomStmt.getObject();
// Create the PredicateObjectMap object for current POM
PredicateObjectMap pom = new PredicateObjectMap(trMap);
// Get the predicate for the POM
Predicate pred = null;
RepositoryResult<Statement> predStmts = con.getStatements(pomBlankNode,
predUri, null, false);
while (predStmts.hasNext()) {
Statement predStmt = predStmts.next();
Value predVal = predStmt.getObject();
pred = new Predicate(predVal.stringValue() + "-" + getNewPredicateId());
// Check if the predicate value is a URI or a literal (such as column name)
if (predVal instanceof Resource) {
pred.getTemplate().addTemplateTermToSet(
new StringTemplateTerm(predVal.stringValue(), true));
} else {
pred.setTemplate(TemplateTermSetBuilder.
constructTemplateTermSetFromR2rmlTemplateString(
predVal.stringValue(), worksheet, factory));
}
}
pom.setPredicate(pred);
// Get the object for the POM
ObjectMap objMap = null;
RepositoryResult<Statement> objMapStmts = con.getStatements(pomBlankNode,
objectMapUri, null, false);
while (objMapStmts.hasNext()) {
Statement objMapStmt = objMapStmts.next();
Resource objNode = (Resource) objMapStmt.getObject();
/** Check if objBlankNode is a RefObjectMap or a normal object map with column **/
if (con.hasStatement(objNode, RDF.TYPE, rfObjClassUri, false)) {
RepositoryResult<Statement> parentTripleMapStmts = con.getStatements(objNode,
parentTriplesMapUri, null, false);
while (parentTripleMapStmts.hasNext()) {
Statement parentTripleMapStmt = parentTripleMapStmts.next();
Resource parentTripleRes = (Resource) parentTripleMapStmt.getObject();
TriplesMap parentTM = this.triplesMapIndex.get(parentTripleRes.stringValue());
// Create a RefObjectMap
RefObjectMap rfMap = new RefObjectMap(objNode.stringValue(), parentTM);
objMap = new ObjectMap(getNewObjectMapId(), rfMap);
// Add the link between triple maps in the auxInfo
TriplesMapLink link = new TriplesMapLink(trMap, parentTM, pom);
this.auxInfo.getTriplesMapGraph().addLink(link);
}
} else {
RepositoryResult<Statement> objMapColStmts = con.getStatements(objNode,
columnUri, null, false);
while (objMapColStmts.hasNext()) {
Statement objMapColStmt = objMapColStmts.next();
Value colVal = objMapColStmt.getObject();
objMap = new ObjectMap(getNewObjectMapId(),
TemplateTermSetBuilder.constructTemplateTermSetFromR2rmlColumnString(
colVal.stringValue(), worksheet, factory));
}
// Check if anything needs to be added to the hNodeIdToPredicateObjectMap Map
addHNodeIdToPredObjectMapLink(objMap, pom);
}
}
pom.setObject(objMap);
trMap.addPredicateObjectMap(pom);
}
}
private void addHNodeIdToPredObjectMapLink(ObjectMap objMap, PredicateObjectMap pom) {
TemplateTermSet objTermSet = objMap.getTemplate();
for (TemplateTerm term:objTermSet.getAllTerms()) {
if (term instanceof ColumnTemplateTerm) {
String hNodeId = term.getTemplateTermValue();
List<PredicateObjectMap> existingPomList = this.auxInfo.
getHNodeIdToPredObjLinks().get(hNodeId);
if (existingPomList == null) {
existingPomList = new ArrayList<PredicateObjectMap>();
}
existingPomList.add(pom);
this.auxInfo.getHNodeIdToPredObjLinks().put(hNodeId, existingPomList);
}
}
}
private int getNewPredicateId() {
return predicateIdCounter++;
}
private String getNewObjectMapId() {
return "ObjectMap" + objectMapCounter++;
}
private SubjectMap addSubjectMapForTripleMap(Resource trMapRes)
throws RepositoryException, JSONException {
SubjectMap subjMap = null;
URI subjMapUri = f.createURI(Uris.RR_SUBJECTMAP_URI);
URI templateUri = f.createURI(Uris.RR_TEMPLATE_URI);
URI steinerTreeRootNodeUri = f.createURI(Uris.KM_STEINER_TREE_ROOT_NODE);
RepositoryResult<Statement> subjMapStmts = con.getStatements(trMapRes, subjMapUri,
null, false);
while (subjMapStmts.hasNext()) {
Statement subjMapStmt = subjMapStmts.next();
Resource subjMapBlankNode = (Resource) subjMapStmt.getObject();
String subjMapId = subjMapBlankNode.stringValue();
subjMap = new SubjectMap(subjMapId);
this.subjectMapIndex.put(subjMapId, subjMap);
// Get the subject template
TemplateTermSet subjTemplTermSet = null;
RepositoryResult<Statement> templates = con.getStatements(subjMapBlankNode,
templateUri, null, false);
while (templates.hasNext()) {
Statement templStmt = templates.next();
System.out.println("Template: " + templStmt.getObject().stringValue());
subjTemplTermSet = TemplateTermSetBuilder.constructTemplateTermSetFromR2rmlTemplateString(
templStmt.getObject().stringValue(), worksheet, factory);
}
subjMap.setTemplate(subjTemplTermSet);
// Get the subject type
RepositoryResult<Statement> rdfTypes = con.getStatements(subjMapBlankNode,
RDF.TYPE, null, false);
while (rdfTypes.hasNext()) {
Statement typeStmt = rdfTypes.next();
if (typeStmt.getObject() instanceof Resource) {
// Skip the steiner tree root type
if(typeStmt.getObject().stringValue().equals(Uris.KM_STEINER_TREE_ROOT_NODE))
continue;
StringTemplateTerm uriTerm = new StringTemplateTerm(
typeStmt.getObject().stringValue(), true);
TemplateTermSet typeTermSet = new TemplateTermSet();
typeTermSet.addTemplateTermToSet(uriTerm);
subjMap.addRdfsType(typeTermSet);
} else {
TemplateTermSet typeTermSet = TemplateTermSetBuilder.constructTemplateTermSetFromR2rmlTemplateString(
typeStmt.getObject().stringValue(), worksheet, factory);
subjMap.addRdfsType(typeTermSet);
}
}
// Check if it is as the Steiner tree root node
if (con.hasStatement(subjMapBlankNode, RDF.TYPE, steinerTreeRootNodeUri, false)) {
subjMap.setAsSteinerTreeRootNode(true);
}
}
return subjMap;
}
private void calculateColumnNodesCoveredByBlankNodes()
throws RepositoryException, JSONException {
URI termTypeUri = f.createURI(Uris.RR_TERM_TYPE_URI);
URI blankNodeUri = f.createURI(Uris.RR_BLANK_NODE_URI);
URI kmCoverColumnUri = f.createURI(Uris.KM_BLANK_NODE_COVERS_COLUMN_URI);
URI kmBnodePrefixUri = f.createURI(Uris.KM_BLANK_NODE_PREFIX_URI);
RepositoryResult<Statement> blankNodeSubjectMapStmts = con.getStatements(null, termTypeUri,
blankNodeUri, false);
List<HNodePath> allColPaths = worksheet.getHeaders().getAllPaths();
while (blankNodeSubjectMapStmts.hasNext()) {
Resource blankNodeSubjRes = blankNodeSubjectMapStmts.next().getSubject();
SubjectMap subjMap = this.subjectMapIndex.get(blankNodeSubjRes.stringValue());
subjMap.setAsBlankNode(true);
// Get the column it covers
RepositoryResult<Statement> coverColStmts = con.getStatements(blankNodeSubjRes,
kmCoverColumnUri, null, false);
List<String> columnsCoveredHnodeIds = new ArrayList<String>();
while (coverColStmts.hasNext()) {
Value colName = coverColStmts.next().getObject();
// If hierarchical column
if (colName.stringValue().startsWith("[") && colName.stringValue().endsWith("]")) {
System.out.println("Hierarchical column encountered!");
JSONArray strArr = new JSONArray(colName.stringValue());
HTable hTable = worksheet.getHeaders();
for (int i=0; i<strArr.length(); i++) {
String cName = (String) strArr.get(i);
logger.debug("Column being normalized: "+ cName);
HNode hNode = hTable.getHNodeFromColumnName(cName);
if(hNode == null || hTable == null) {
logger.error("Error retrieving column: " + cName);
}
if (i == strArr.length()-1) { // Found!
String hNodeId = hNode.getId();
columnsCoveredHnodeIds.add(hNodeId);
} else {
hTable = hNode.getNestedTable();
}
}
}
// Single level column
else {
for (HNodePath path:allColPaths) {
HNode lastNode = path.getLeaf();
// System.out.println("Last node col name: " + lastNode.getColumnName());
if (colName.stringValue().equals(lastNode.getColumnName())) {
// System.out.println("Matched column name: " + colName);
columnsCoveredHnodeIds.add(lastNode.getId());
}
}
}
// System.out.println("Column name from RDF: " +colName.stringValue());
}
System.out.println("Adding columns for blank node" + subjMap.getId() + " List: " + columnsCoveredHnodeIds);
this.auxInfo.getBlankNodesColumnCoverage().put(subjMap.getId(), columnsCoveredHnodeIds);
// Get the blank node prefix
RepositoryResult<Statement> bnodePrefixStmts = con.getStatements(blankNodeSubjRes,
kmBnodePrefixUri, null, false);
while (bnodePrefixStmts.hasNext()) {
this.auxInfo.getBlankNodesUriPrefixMap().put(subjMap.getId(),
bnodePrefixStmts.next().getObject().stringValue());
}
}
}
public static void main(String[] args) {
String str = "http://id.americanart.si.edu/linkeddata/conceptscheme}";
Pattern p = Pattern.compile("\\{\\\".*?\\\"\\}");
Matcher matcher = p.matcher(str);
if (matcher.find()) {
matcher.reset();
while (matcher.find()) {
System.out.println("Match: " + matcher.group());
}
} else {
System.out.println("Only string!");
}
// File modelFile = new File("/Users/shubhamgupta/Documents/eclipse/workspace/Web-Karma/src/" +
// "main/webapp/publish/R2RML/WSP1VW1-wells-small.csv-model.ttl");
try {
//// RepositoryConnection con = myRepository.getConnection();
//// ValueFactory f = myRepository.getValueFactory();
// con.add(modelFile, "", RDFFormat.TURTLE);
//
// // Get all the triple maps
// URI trTypeUri = f.createURI(Uris.RR_TRIPLESMAP_CLASS_URI);
// URI templateUri = f.createURI(Uris.RR_TEMPLATE_URI);
// URI subjMapUri = f.createURI(Uris.RR_SUBJECTMAP_URI);
// URI predUri = f.createURI(Uris.RR_PREDICATE_URI);
// URI objectMapUri = f.createURI(Uris.RR_OBJECTMAP_URI);
// URI columnUri = f.createURI(Uris.RR_COLUMN_URI);
// URI rfObjClassUri = f.createURI(Uris.RR_REF_OBJECT_MAP_URI);
// URI parentTriplesMapUri = f.createURI(Uris.RR_PARENT_TRIPLE_MAP_URI);
// URI predObjMapMapUri = f.createURI(Uris.RR_PRED_OBJ_MAP_URI);
// URI blankNodeUri = f.createURI(Uris.RR_BLANK_NODE_URI);
// URI termTypeUri = f.createURI(Uris.RR_TERM_TYPE_URI);
//
// URI coversColUri = f.createURI(Uris.KM_BLANK_NODE_COVERS_COLUMN_URI);
// URI bnNamePrefixUri = f.createURI(Uris.KM_BLANK_NODE_PREFIX_URI);
// RepositoryResult<Statement> tripleMapsStmts = con.getStatements(null, RDF.TYPE, trTypeUri, false);
// while (tripleMapsStmts.hasNext()) {
// Statement st = tripleMapsStmts.next();
// Resource subj = st.getSubject();
// System.out.println(subj.stringValue());
//
// // Get the subject map
// RepositoryResult<Statement> subjMapStmts = con.getStatements(subj, subjMapUri, null, false);
// while (subjMapStmts.hasNext()) {
// Statement subjMapStmt = subjMapStmts.next();
// Resource subjMapBlankNode = (Resource) subjMapStmt.getObject();
//
// System.out.println(subjMapBlankNode.stringValue());
//
// // Get the subject template
// RepositoryResult<Statement> templates = con.getStatements(subjMapBlankNode,
// templateUri, null, false);
// while (templates.hasNext()) {
// Statement templStmt = templates.next();
// System.out.println("Template: " + templStmt.getObject().stringValue());
// }
// }
// }
//
} catch (Exception t) {
t.printStackTrace();
}
}
}