package org.deri.grefine.reconcile.rdf.factories; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; import org.deri.grefine.reconcile.model.ReconciliationCandidate; import org.deri.grefine.reconcile.model.SearchResultItem; import org.deri.grefine.reconcile.util.ResultSetWrappingUtil; import org.deri.grefine.reconcile.util.StringUtils; import com.google.common.collect.ImmutableList; import com.google.common.collect.Multimap; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Literal; /** * @author fadmaa * provides a default implementation of {@link org.deri.grefine.reconcile.rdf.factories.SparqlQueryFactory} * this implementation takes care of the well-defined parts of the queries a.k.a standardised while leave the rest to actual implementation through * abstract methods. * */ public abstract class AbstractSparqlQueryFactory implements SparqlQueryFactory{ @Override public String getSampleInstancesSparqlQuery(String typeUri, ImmutableList<String> searchPropertyUris, int limit){ String labelClause = getLabelClause(searchPropertyUris.size()); StringBuilder labelPropertyClause = new StringBuilder(); int i=1; for(String propUri:searchPropertyUris){ labelPropertyClause.append(LABEL_PROPERTY_FILTER.replace("[[PROPERTY_URI]]", propUri).replace("[[INDEX]]", String.valueOf(i))); i++; } return SAMPLE_INSTANCES_QUERY.replace("[[TYPE_URI]]", typeUri) .replace("[[LABEL_CLAUSE]]",labelClause) .replace("[[LABEL_PROPERTY_CLAUSE]]", labelPropertyClause) .replace("[[LIMIT]]", String.valueOf(limit)); } @Override public String getSampleValuesOfPropertySparqlQuery(String propertyUri, int limit){ return SAMPLE_PROPERTY_INSTANCES_QUERY.replace("[[PROPERTY_URI]]", propertyUri).replace("[[LIMIT]]", String.valueOf(limit)); } @Override public String getResourcePropertiesMapSparqlQuery(String resourceId, int limit) { return RESOURCE_PROPERTIES_QUERY.replace("[[RESOURCE]]", resourceId).replace("[[LIMIT]]", String.valueOf(limit)); } @Override public ImmutableList<SearchResultItem> wrapTypeSuggestResultSet(ResultSet resultSet, String prefix, int limit) { return ResultSetWrappingUtil.resultSetToSearchResultListFilterDuplicates(resultSet, limit); } @Override public ImmutableList<SearchResultItem> wrapPropertySuggestResultSet(ResultSet resultSet, String prefix, int limit) { return ResultSetWrappingUtil.resultSetToSearchResultListFilterDuplicates(resultSet, limit); } @Override public ImmutableList<SearchResultItem> wrapEntitySearchResultSet(ResultSet resultSet, int limit) { return ResultSetWrappingUtil.resultSetToSearchResultListFilterDuplicates(resultSet, limit); } @Override public ImmutableList<SearchResultItem> wrapSampleInstancesResultSet(ResultSet resultSet, String typeId,ImmutableList<String> searchPropertyUris, int limit) { List<SearchResultItem> results = new ArrayList<SearchResultItem>(); while(resultSet.hasNext()){ QuerySolution sol = resultSet.next(); String id = sol.getResource("entity").getURI(); String name = getFirstNonNullLabel(sol,searchPropertyUris); double score = 0; results.add(new SearchResultItem(id, name, score)); } return ImmutableList.copyOf(results); } private String getFirstNonNullLabel(QuerySolution sol, ImmutableList<String> searchPropertyUris) { for(int i=1;i<=searchPropertyUris.size();i++){ Literal l = sol.getLiteral("label" + i); if(l!=null){ return l.getString(); } } return ""; } @Override public ImmutableList<String[]> wrapSampleValuesOfPropertyResultSet(ResultSet resultSet, String propertyUri, int limit) { return ResultSetWrappingUtil.resultSetToListOfPairs(resultSet); } @Override public Multimap<String, String> wrapResourcePropertiesMapResultSet(ResultSet resultSet, String resourceId, int limit) { return ResultSetWrappingUtil.resultSetToMultimap(resultSet); } @Override public String getTypesOfEntitiesQuery(ImmutableList<String> entityUris) { String entityEqualityFilter = StringUtils.join(entityUris, "> || ", "?entity=<", "", ">"); return TYPES_OF_ENTITIES_QUERY.replace("[[ENTITY_EQUALITY_FILTER]]", entityEqualityFilter); } @Override public Multimap<String, String> wrapTypesOfEntities(ResultSet resultSet) { return ResultSetWrappingUtil.resultSetToMultimap(resultSet); } @Override public String getResourcePropertiesMapSparqlQuery(PreviewResourceCannedQuery cannedQuery, String resourceId) { return cannedQuery.getPreviewQueryForResource(resourceId); } @Override public Multimap<String, String> wrapResourcePropertiesMapResultSet(PreviewResourceCannedQuery cannedQuery, ResultSet resultset) { return cannedQuery.wrapResourcePropertiesMapResultSet(resultset); } /** * put the ResultSet returned from SPARQL endpoint into the {@link org.deri.grefine.reconciliation.model.GRefineReconciliationResponse GRefineReconciliationResponse} <br/> * @param result * @return */ @Override public List<ReconciliationCandidate> wrapReconciliationResultset(ResultSet result, String queryString, ImmutableList<String> searchPropertyUris, int limit, double matchThreshold){ List<ReconciliationCandidate> candidates = new ArrayList<ReconciliationCandidate>(); Set<String> seen = new HashSet<String>(); boolean match = false; boolean moreThanOneMatchFound = false; double maxScore = 0.0; boolean first = true; while(result.hasNext()){ QuerySolution solution = result.nextSolution(); String entityUri = solution.getResource("entity").getURI(); if(seen.contains(entityUri)){ //already seen continue; } seen.add(entityUri); String label = solution.getLiteral("label").getString(); //score returned by Lucene is only meaningful to compare results of the *same* query //they cannot be used as percentage see: http://wiki.apache.org/lucene-java/ScoresAsPercentages //they are used to weight the edit distance Literal scoreWieghtLiteral = solution.getLiteral("score1"); if(scoreWieghtLiteral!=null && first){ first = false; maxScore = scoreWieghtLiteral.getDouble(); } double scoreWeight = scoreWieghtLiteral == null? 1 : scoreWieghtLiteral.getDouble()/maxScore; double score = scoreWeight * StringUtils.getLevenshteinScore(label, queryString); if(score>=matchThreshold){ if(match){ moreThanOneMatchFound = true; }else{ match = true; } } candidates.add(new ReconciliationCandidate(entityUri, label, new String[] {}, score, match)); if(candidates.size()==limit){ //we got enough break; } } if(moreThanOneMatchFound){ //set all matches to false for(ReconciliationCandidate candidate:candidates){ candidate.setMatch(false); } } Collections.sort(candidates,new Comparator<ReconciliationCandidate>() { @Override public int compare(ReconciliationCandidate o1,ReconciliationCandidate o2) { //discendingly return Double.compare(o2.getScore(), o1.getScore()); } }); return candidates; } private String getLabelClause(int num){ StringBuilder labelClause = new StringBuilder(); for(int i=1;i<=num;i++){ labelClause.append(LABEL).append(i); } return labelClause.toString(); } private static final String SAMPLE_INSTANCES_QUERY = "SELECT DISTINCT ?entity [[LABEL_CLAUSE]] " + "WHERE{" + "?entity a <[[TYPE_URI]]>. " + "[[LABEL_PROPERTY_CLAUSE]]" + "}LIMIT [[LIMIT]]"; private static final String SAMPLE_PROPERTY_INSTANCES_QUERY = "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " + "SELECT DISTINCT ?s ?o " + "WHERE{" + "?s <[[PROPERTY_URI]]> ?o. " + "}LIMIT [[LIMIT]]"; private static final String RESOURCE_PROPERTIES_QUERY = "SELECT DISTINCT ?p ?v " + "WHERE{ " + "<[[RESOURCE]]> ?p ?v. " + "}LIMIT [[LIMIT]]"; private static final String TYPES_OF_ENTITIES_QUERY = "SELECT ?entity ?type " + "WHERE{ " + "?entity a ?type. " + "FILTER ([[ENTITY_EQUALITY_FILTER]]). " + "}"; private static final String LABEL = " ?label"; private static final String LABEL_PROPERTY_FILTER = "OPTIONAL {?entity <[[PROPERTY_URI]]> ?label[[INDEX]]} "; }