/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.controller.command.reconciliation;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.http.client.ClientProtocolException;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import edu.isi.karma.controller.command.Command;
import edu.isi.karma.controller.command.CommandException;
import edu.isi.karma.controller.update.ErrorUpdate;
import edu.isi.karma.controller.update.InfoUpdate;
import edu.isi.karma.controller.update.SVGAlignmentUpdate_ForceKarmaLayout;
import edu.isi.karma.controller.update.SemanticTypesUpdate;
import edu.isi.karma.controller.update.UpdateContainer;
import edu.isi.karma.er.helper.TripleStoreUtil;
import edu.isi.karma.kr2rml.ErrorReport;
import edu.isi.karma.kr2rml.KR2RMLMappingGenerator;
import edu.isi.karma.kr2rml.KR2RMLWorksheetRDFGenerator;
import edu.isi.karma.kr2rml.ReportMessage;
import edu.isi.karma.kr2rml.TriplesMap;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.alignment.Alignment;
import edu.isi.karma.modeling.alignment.AlignmentManager;
import edu.isi.karma.modeling.semantictypes.SemanticTypeUtil;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.HNodePath;
import edu.isi.karma.rep.HTable;
import edu.isi.karma.rep.Node;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Row;
import edu.isi.karma.rep.Table;
import edu.isi.karma.rep.Worksheet;
import edu.isi.karma.rep.alignment.ClassInstanceLink;
import edu.isi.karma.rep.alignment.ColumnNode;
import edu.isi.karma.rep.alignment.DataPropertyLink;
import edu.isi.karma.rep.alignment.Link;
import edu.isi.karma.rep.alignment.LinkKeyInfo;
import edu.isi.karma.util.HTTPUtil;
import edu.isi.karma.view.VWorkspace;
import edu.isi.karma.view.ViewPreferences;
public class InvokeRubenReconciliationService extends Command {
private final String alignmentNodeId;
private final String vWorksheetId;
private String rdfPrefix;
private String rdfNamespace;
private final String reconciliationServiceUrl = "http://entities.restdesc.org/disambiguations/";
public InvokeRubenReconciliationService(String id, String alignmentNodeId, String vWorksheetId) {
super(id);
this.alignmentNodeId = alignmentNodeId;
this.vWorksheetId = vWorksheetId;
addTag(CommandTag.Transformation);
}
@Override
public String getCommandName() {
return this.getClass().getSimpleName();
}
@Override
public String getTitle() {
return "Invoke Reconciliation";
}
@Override
public String getDescription() {
return "";
}
@Override
public CommandType getCommandType() {
return CommandType.notUndoable;
}
@Override
public UpdateContainer doIt(VWorkspace vWorkspace) throws CommandException {
RepFactory f = vWorkspace.getRepFactory();
Worksheet worksheet = vWorkspace.getViewFactory().getVWorksheet(vWorksheetId).getWorksheet();
Alignment alignment = AlignmentManager.Instance().getAlignment(
AlignmentManager.Instance().constructAlignmentId(vWorkspace.getWorkspace().getId(),
vWorksheetId));
// Set the prefix and namespace to be used while generating RDF
fetchRdfPrefixAndNamespaceFromPreferences(vWorkspace);
// Generate the KR2RML data structures for the RDF generation
final ErrorReport errorReport = new ErrorReport();
KR2RMLMappingGenerator mappingGen = new KR2RMLMappingGenerator(
vWorkspace.getWorkspace().getOntologyManager(), alignment,
worksheet.getSemanticTypes(), rdfPrefix, rdfNamespace,
true, errorReport);
TriplesMap trMap = mappingGen.getTriplesMapForNodeId(alignmentNodeId);
// Remove the triple maps and info that we don't need
// filterTripleMapsAndAuxillaryInformation();
// Get the column that contains the key for the internal node
String keyColumnHNodeId = getKeyColumnHNodeIdForAlignmentNode(alignment);
if (keyColumnHNodeId == null) {
return new UpdateContainer(new ErrorUpdate("Please assign a column as a key for the class"));
}
// Loop through each row that contains the column containing key
HNode hNode = f.getHNode(keyColumnHNodeId);
HNodePath path = hNode.getHNodePath(f);
Collection<Node> nodes = new ArrayList<Node>();
worksheet.getDataTable().collectNodes(path, nodes);
Map<Row, String> rowToUriMap = new HashMap<Row, String>();
// For through each row, generate the RDF, and invoke the service
try {
int count = 1;
for (Node node:nodes) {
if (count % 5 ==0) {
System.out.println("Done invoking linking service for " + count + " rows");
}
Row row = node.getBelongsToRow();
// Generate the RDF
StringWriter outRdf = new StringWriter();
PrintWriter pw = new PrintWriter(outRdf);
KR2RMLWorksheetRDFGenerator rdfGen = new KR2RMLWorksheetRDFGenerator(worksheet,
vWorkspace.getRepFactory(), vWorkspace.getWorkspace().getOntologyManager(),
pw, mappingGen.getMappingAuxillaryInformation(), errorReport, false);
rdfGen.generateTriplesForRow(row, new HashSet<String>(), new HashSet<String>(),
new HashMap<String, ReportMessage>(), new HashSet<String>());
pw.flush();
String rdf = outRdf.toString();
// Sanity check
if (rdf == null || rdf.trim().isEmpty()) continue;
String keyUri = rdfGen.normalizeUri(rdfGen.getTemplateTermSetPopulatedWithValues(node.getColumnValues()
, trMap.getSubject().getTemplate()));
rowToUriMap.put(row, keyUri);
// Check if the macthes already exist in the triple store
if (checkTripleStoreIfMatchAlreadyExists(keyUri)) {
System.out.println("Match already exists!");
outRdf.close();
pw.close();
count++;
continue;
}
// Invoke the linking service if no match exists in the triple store
String serviceInput = rdf.replaceAll('<' + keyUri + '>', "?x");
String res = invokeReconcilitaionService(serviceInput);
if (res == null || res.isEmpty()) {
System.out.println("No linking output for " + serviceInput);
continue;
}
// Insert the subject uri inside the service output
int triplesStartIndex = res.indexOf("[");
if (triplesStartIndex != -1) {
String finalRdfOutput = res.substring(0, triplesStartIndex) +
"<" + keyUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> " +
res.substring(triplesStartIndex);
HTTPUtil.executeHTTPPostRequest(TripleStoreUtil.defaultDataRepoUrl + "/statements", "text/n3",
"", finalRdfOutput);
}
outRdf.close();
pw.close();
count++;
}
// Add a column at the same level as key column
HNode linkingHNode = hNode.getHTable(f).addNewHNodeAfter(hNode.getId(), f,
"LinkingMatches", worksheet, true);
// Add a nested table inside the linkingHNode
HTable linkingNestedTable = linkingHNode.addNestedTable("Matches", worksheet, f);
HNode entityColHNode = linkingNestedTable.addHNode("Entity", worksheet, f);
HNode scoreColHNode = linkingNestedTable.addNewHNodeAfter(entityColHNode.getId(),
f, "Score", worksheet, true);
// For each row, query the triple store to get the possible matches
for (Row row:rowToUriMap.keySet()) {
String subjUri = rowToUriMap.get(row);
// Query the triple store to get a list of matches
String query = "PREFIX d:<http://entities.restdesc.org/terms#> " +
"SELECT ?entity ?score WHERE " +
"{ <" + subjUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> ?x ." +
" ?x d:possibleMatch ?match . " +
" ?match d:entity ?entity . " +
" ?match d:similarity ?score . " +
"} ORDER BY DESC(?score)";
String sData = TripleStoreUtil.invokeSparqlQuery(query, TripleStoreUtil.defaultDataRepoUrl, "application/sparql-results+json", null);
if (sData == null | sData.isEmpty()) {
System.out.println("Empty response object from query : " + query);
}
JSONObject queryRes = new JSONObject(sData);
if (queryRes != null) {
Table linkingDataTable = row.getNode(linkingHNode.getId()).getNestedTable();
JSONArray bindings = queryRes.getJSONObject("results").getJSONArray("bindings");
if (bindings == null || bindings.length() == 0) continue;
for (int i=0; i<bindings.length(); i++) {
JSONObject binding = bindings.getJSONObject(i);
Row r1 = linkingDataTable.addRow(f);
String score = binding.getJSONObject("score").getString("value");
if (score.length() > 5) {
score = score.substring(0, 4);
}
r1.setValue(entityColHNode.getId(), binding.getJSONObject("entity").getString("value"), f);
r1.setValue(scoreColHNode.getId(), score, f);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
// Prepare the output container
UpdateContainer c = new UpdateContainer();
vWorkspace.getViewFactory().updateWorksheet(vWorksheetId, worksheet,worksheet.getHeaders().getAllPaths(), vWorkspace);
vWorkspace.getViewFactory().getVWorksheet(this.vWorksheetId).update(c);
/** Add the alignment update **/
addAlignmentUpdate(c, vWorkspace, worksheet);
c.add(new InfoUpdate("Linking complete"));
return c;
}
// private void filterTripleMapsAndAuxillaryInformation() {
//
// }
private boolean checkTripleStoreIfMatchAlreadyExists(String keyUri)
throws ClientProtocolException, IOException, JSONException {
// Query the triple store to get a list of matches
String query = "PREFIX d:<http://entities.restdesc.org/terms#> " +
"SELECT ?match WHERE " +
"{ <" + keyUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> ?x ." +
" ?x d:possibleMatch ?match . " +
"}";
String sData = TripleStoreUtil.invokeSparqlQuery(query, TripleStoreUtil.defaultDataRepoUrl, "application/sparql-results+json", null);
if (sData == null | sData.isEmpty()) {
System.out.println("Empty response object from query : " + query);
}
JSONObject queryRes = new JSONObject(sData);
if (queryRes != null
&& queryRes.getJSONObject("results") != null
&& queryRes.getJSONObject("results").getJSONArray("bindings") != null
&& queryRes.getJSONObject("results").getJSONArray("bindings").length() != 0) {
return true;
}
return false;
}
private String invokeReconcilitaionService(String serviceInput) {
try {
String output = HTTPUtil.executeHTTPPostRequest(reconciliationServiceUrl, "text/n3",
null, serviceInput);
return output;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private String getKeyColumnHNodeIdForAlignmentNode(Alignment alignment) {
for (Link outgoingLink:alignment.getCurrentOutgoingLinksToNode(alignmentNodeId)) {
// Column contains uris for the internal node
if (outgoingLink instanceof ClassInstanceLink
&& (outgoingLink.getKeyType() == LinkKeyInfo.UriOfInstance
|| outgoingLink.getKeyType() == LinkKeyInfo.PartOfKey)) {
if (outgoingLink.getTarget() instanceof ColumnNode) {
return ((ColumnNode) outgoingLink.getTarget()).getHNodeId();
}
}
// Column link is a data property marked as key
if (outgoingLink instanceof DataPropertyLink
&& outgoingLink.getKeyType() == LinkKeyInfo.PartOfKey) {
if (outgoingLink.getTarget() instanceof ColumnNode) {
return ((ColumnNode) outgoingLink.getTarget()).getHNodeId();
}
}
}
return null;
}
@Override
public UpdateContainer undoIt(VWorkspace vWorkspace) {
return null;
}
private void fetchRdfPrefixAndNamespaceFromPreferences(VWorkspace vWorkspace) {
//get the rdf prefix from the preferences
ViewPreferences prefs = vWorkspace.getPreferences();
JSONObject prefObject = prefs.getCommandPreferencesJSONObject("PublishRDFCommandPreferences");
this.rdfNamespace = "http://localhost/source/";
this.rdfPrefix = "s";
if(prefObject!=null){
this.rdfPrefix = prefObject.optString("rdfPrefix");
this.rdfNamespace = prefObject.optString("rdfNamespace");
}
if(rdfPrefix==null || rdfPrefix.trim().isEmpty()) {
this.rdfPrefix = "http://localhost/source/";
}
}
private void addAlignmentUpdate(UpdateContainer c, VWorkspace vWorkspace, Worksheet worksheet) {
String alignmentId = AlignmentManager.Instance().constructAlignmentId(
vWorkspace.getWorkspace().getId(), vWorksheetId);
Alignment alignment = AlignmentManager.Instance().getAlignment(alignmentId);
if (alignment == null) {
alignment = new Alignment(vWorkspace.getWorkspace().getOntologyManager());
AlignmentManager.Instance().addAlignmentToMap(alignmentId, alignment);
}
// Compute the semantic type suggestions
SemanticTypeUtil.computeSemanticTypesSuggestion(worksheet, vWorkspace.getWorkspace()
.getCrfModelHandler(), vWorkspace.getWorkspace().getOntologyManager(), alignment);
c.add(new SemanticTypesUpdate(worksheet, vWorksheetId, alignment));
c.add(new SVGAlignmentUpdate_ForceKarmaLayout(vWorkspace.getViewFactory().
getVWorksheet(vWorksheetId), alignment));
}
}