/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.controller.command.reconciliation; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import org.apache.http.client.ClientProtocolException; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import edu.isi.karma.controller.command.Command; import edu.isi.karma.controller.command.CommandException; import edu.isi.karma.controller.update.ErrorUpdate; import edu.isi.karma.controller.update.InfoUpdate; import edu.isi.karma.controller.update.SVGAlignmentUpdate_ForceKarmaLayout; import edu.isi.karma.controller.update.SemanticTypesUpdate; import edu.isi.karma.controller.update.UpdateContainer; import edu.isi.karma.er.helper.TripleStoreUtil; import edu.isi.karma.kr2rml.ErrorReport; import edu.isi.karma.kr2rml.KR2RMLMappingGenerator; import edu.isi.karma.kr2rml.KR2RMLWorksheetRDFGenerator; import edu.isi.karma.kr2rml.ReportMessage; import edu.isi.karma.kr2rml.TriplesMap; import edu.isi.karma.modeling.Uris; import edu.isi.karma.modeling.alignment.Alignment; import edu.isi.karma.modeling.alignment.AlignmentManager; import edu.isi.karma.modeling.semantictypes.SemanticTypeUtil; import edu.isi.karma.rep.HNode; import edu.isi.karma.rep.HNodePath; import edu.isi.karma.rep.HTable; import edu.isi.karma.rep.Node; import edu.isi.karma.rep.RepFactory; import edu.isi.karma.rep.Row; import edu.isi.karma.rep.Table; import edu.isi.karma.rep.Worksheet; import edu.isi.karma.rep.alignment.ClassInstanceLink; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.DataPropertyLink; import edu.isi.karma.rep.alignment.Link; import edu.isi.karma.rep.alignment.LinkKeyInfo; import edu.isi.karma.util.HTTPUtil; import edu.isi.karma.view.VWorkspace; import edu.isi.karma.view.ViewPreferences; public class InvokeRubenReconciliationService extends Command { private final String alignmentNodeId; private final String vWorksheetId; private String rdfPrefix; private String rdfNamespace; private final String reconciliationServiceUrl = "http://entities.restdesc.org/disambiguations/"; public InvokeRubenReconciliationService(String id, String alignmentNodeId, String vWorksheetId) { super(id); this.alignmentNodeId = alignmentNodeId; this.vWorksheetId = vWorksheetId; addTag(CommandTag.Transformation); } @Override public String getCommandName() { return this.getClass().getSimpleName(); } @Override public String getTitle() { return "Invoke Reconciliation"; } @Override public String getDescription() { return ""; } @Override public CommandType getCommandType() { return CommandType.notUndoable; } @Override public UpdateContainer doIt(VWorkspace vWorkspace) throws CommandException { RepFactory f = vWorkspace.getRepFactory(); Worksheet worksheet = vWorkspace.getViewFactory().getVWorksheet(vWorksheetId).getWorksheet(); Alignment alignment = AlignmentManager.Instance().getAlignment( AlignmentManager.Instance().constructAlignmentId(vWorkspace.getWorkspace().getId(), vWorksheetId)); // Set the prefix and namespace to be used while generating RDF fetchRdfPrefixAndNamespaceFromPreferences(vWorkspace); // Generate the KR2RML data structures for the RDF generation final ErrorReport errorReport = new ErrorReport(); KR2RMLMappingGenerator mappingGen = new KR2RMLMappingGenerator( vWorkspace.getWorkspace().getOntologyManager(), alignment, worksheet.getSemanticTypes(), rdfPrefix, rdfNamespace, true, errorReport); TriplesMap trMap = mappingGen.getTriplesMapForNodeId(alignmentNodeId); // Remove the triple maps and info that we don't need // filterTripleMapsAndAuxillaryInformation(); // Get the column that contains the key for the internal node String keyColumnHNodeId = getKeyColumnHNodeIdForAlignmentNode(alignment); if (keyColumnHNodeId == null) { return new UpdateContainer(new ErrorUpdate("Please assign a column as a key for the class")); } // Loop through each row that contains the column containing key HNode hNode = f.getHNode(keyColumnHNodeId); HNodePath path = hNode.getHNodePath(f); Collection<Node> nodes = new ArrayList<Node>(); worksheet.getDataTable().collectNodes(path, nodes); Map<Row, String> rowToUriMap = new HashMap<Row, String>(); // For through each row, generate the RDF, and invoke the service try { int count = 1; for (Node node:nodes) { if (count % 5 ==0) { System.out.println("Done invoking linking service for " + count + " rows"); } Row row = node.getBelongsToRow(); // Generate the RDF StringWriter outRdf = new StringWriter(); PrintWriter pw = new PrintWriter(outRdf); KR2RMLWorksheetRDFGenerator rdfGen = new KR2RMLWorksheetRDFGenerator(worksheet, vWorkspace.getRepFactory(), vWorkspace.getWorkspace().getOntologyManager(), pw, mappingGen.getMappingAuxillaryInformation(), errorReport, false); rdfGen.generateTriplesForRow(row, new HashSet<String>(), new HashSet<String>(), new HashMap<String, ReportMessage>(), new HashSet<String>()); pw.flush(); String rdf = outRdf.toString(); // Sanity check if (rdf == null || rdf.trim().isEmpty()) continue; String keyUri = rdfGen.normalizeUri(rdfGen.getTemplateTermSetPopulatedWithValues(node.getColumnValues() , trMap.getSubject().getTemplate())); rowToUriMap.put(row, keyUri); // Check if the macthes already exist in the triple store if (checkTripleStoreIfMatchAlreadyExists(keyUri)) { System.out.println("Match already exists!"); outRdf.close(); pw.close(); count++; continue; } // Invoke the linking service if no match exists in the triple store String serviceInput = rdf.replaceAll('<' + keyUri + '>', "?x"); String res = invokeReconcilitaionService(serviceInput); if (res == null || res.isEmpty()) { System.out.println("No linking output for " + serviceInput); continue; } // Insert the subject uri inside the service output int triplesStartIndex = res.indexOf("["); if (triplesStartIndex != -1) { String finalRdfOutput = res.substring(0, triplesStartIndex) + "<" + keyUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> " + res.substring(triplesStartIndex); HTTPUtil.executeHTTPPostRequest(TripleStoreUtil.defaultDataRepoUrl + "/statements", "text/n3", "", finalRdfOutput); } outRdf.close(); pw.close(); count++; } // Add a column at the same level as key column HNode linkingHNode = hNode.getHTable(f).addNewHNodeAfter(hNode.getId(), f, "LinkingMatches", worksheet, true); // Add a nested table inside the linkingHNode HTable linkingNestedTable = linkingHNode.addNestedTable("Matches", worksheet, f); HNode entityColHNode = linkingNestedTable.addHNode("Entity", worksheet, f); HNode scoreColHNode = linkingNestedTable.addNewHNodeAfter(entityColHNode.getId(), f, "Score", worksheet, true); // For each row, query the triple store to get the possible matches for (Row row:rowToUriMap.keySet()) { String subjUri = rowToUriMap.get(row); // Query the triple store to get a list of matches String query = "PREFIX d:<http://entities.restdesc.org/terms#> " + "SELECT ?entity ?score WHERE " + "{ <" + subjUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> ?x ." + " ?x d:possibleMatch ?match . " + " ?match d:entity ?entity . " + " ?match d:similarity ?score . " + "} ORDER BY DESC(?score)"; String sData = TripleStoreUtil.invokeSparqlQuery(query, TripleStoreUtil.defaultDataRepoUrl, "application/sparql-results+json", null); if (sData == null | sData.isEmpty()) { System.out.println("Empty response object from query : " + query); } JSONObject queryRes = new JSONObject(sData); if (queryRes != null) { Table linkingDataTable = row.getNode(linkingHNode.getId()).getNestedTable(); JSONArray bindings = queryRes.getJSONObject("results").getJSONArray("bindings"); if (bindings == null || bindings.length() == 0) continue; for (int i=0; i<bindings.length(); i++) { JSONObject binding = bindings.getJSONObject(i); Row r1 = linkingDataTable.addRow(f); String score = binding.getJSONObject("score").getString("value"); if (score.length() > 5) { score = score.substring(0, 4); } r1.setValue(entityColHNode.getId(), binding.getJSONObject("entity").getString("value"), f); r1.setValue(scoreColHNode.getId(), score, f); } } } } catch (Exception e) { e.printStackTrace(); } // Prepare the output container UpdateContainer c = new UpdateContainer(); vWorkspace.getViewFactory().updateWorksheet(vWorksheetId, worksheet,worksheet.getHeaders().getAllPaths(), vWorkspace); vWorkspace.getViewFactory().getVWorksheet(this.vWorksheetId).update(c); /** Add the alignment update **/ addAlignmentUpdate(c, vWorkspace, worksheet); c.add(new InfoUpdate("Linking complete")); return c; } // private void filterTripleMapsAndAuxillaryInformation() { // // } private boolean checkTripleStoreIfMatchAlreadyExists(String keyUri) throws ClientProtocolException, IOException, JSONException { // Query the triple store to get a list of matches String query = "PREFIX d:<http://entities.restdesc.org/terms#> " + "SELECT ?match WHERE " + "{ <" + keyUri + "> <" + Uris.KM_LINKING_MATCHES_URI + "> ?x ." + " ?x d:possibleMatch ?match . " + "}"; String sData = TripleStoreUtil.invokeSparqlQuery(query, TripleStoreUtil.defaultDataRepoUrl, "application/sparql-results+json", null); if (sData == null | sData.isEmpty()) { System.out.println("Empty response object from query : " + query); } JSONObject queryRes = new JSONObject(sData); if (queryRes != null && queryRes.getJSONObject("results") != null && queryRes.getJSONObject("results").getJSONArray("bindings") != null && queryRes.getJSONObject("results").getJSONArray("bindings").length() != 0) { return true; } return false; } private String invokeReconcilitaionService(String serviceInput) { try { String output = HTTPUtil.executeHTTPPostRequest(reconciliationServiceUrl, "text/n3", null, serviceInput); return output; } catch (Exception e) { e.printStackTrace(); } return null; } private String getKeyColumnHNodeIdForAlignmentNode(Alignment alignment) { for (Link outgoingLink:alignment.getCurrentOutgoingLinksToNode(alignmentNodeId)) { // Column contains uris for the internal node if (outgoingLink instanceof ClassInstanceLink && (outgoingLink.getKeyType() == LinkKeyInfo.UriOfInstance || outgoingLink.getKeyType() == LinkKeyInfo.PartOfKey)) { if (outgoingLink.getTarget() instanceof ColumnNode) { return ((ColumnNode) outgoingLink.getTarget()).getHNodeId(); } } // Column link is a data property marked as key if (outgoingLink instanceof DataPropertyLink && outgoingLink.getKeyType() == LinkKeyInfo.PartOfKey) { if (outgoingLink.getTarget() instanceof ColumnNode) { return ((ColumnNode) outgoingLink.getTarget()).getHNodeId(); } } } return null; } @Override public UpdateContainer undoIt(VWorkspace vWorkspace) { return null; } private void fetchRdfPrefixAndNamespaceFromPreferences(VWorkspace vWorkspace) { //get the rdf prefix from the preferences ViewPreferences prefs = vWorkspace.getPreferences(); JSONObject prefObject = prefs.getCommandPreferencesJSONObject("PublishRDFCommandPreferences"); this.rdfNamespace = "http://localhost/source/"; this.rdfPrefix = "s"; if(prefObject!=null){ this.rdfPrefix = prefObject.optString("rdfPrefix"); this.rdfNamespace = prefObject.optString("rdfNamespace"); } if(rdfPrefix==null || rdfPrefix.trim().isEmpty()) { this.rdfPrefix = "http://localhost/source/"; } } private void addAlignmentUpdate(UpdateContainer c, VWorkspace vWorkspace, Worksheet worksheet) { String alignmentId = AlignmentManager.Instance().constructAlignmentId( vWorkspace.getWorkspace().getId(), vWorksheetId); Alignment alignment = AlignmentManager.Instance().getAlignment(alignmentId); if (alignment == null) { alignment = new Alignment(vWorkspace.getWorkspace().getOntologyManager()); AlignmentManager.Instance().addAlignmentToMap(alignmentId, alignment); } // Compute the semantic type suggestions SemanticTypeUtil.computeSemanticTypesSuggestion(worksheet, vWorkspace.getWorkspace() .getCrfModelHandler(), vWorkspace.getWorkspace().getOntologyManager(), alignment); c.add(new SemanticTypesUpdate(worksheet, vWorksheetId, alignment)); c.add(new SVGAlignmentUpdate_ForceKarmaLayout(vWorkspace.getViewFactory(). getVWorksheet(vWorksheetId), alignment)); } }