package services; import helpers.SCHEMA; import models.Commit; import models.GraphHistory; import models.Record; import models.Resource; import models.TripleCommit; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QueryFactory; import org.apache.jena.query.QueryParseException; import org.apache.jena.query.QuerySolution; import org.apache.jena.query.ResultSet; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.shared.Lock; import org.apache.jena.vocabulary.RDF; import play.Logger; import services.repository.Writable; import java.io.IOException; import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * Created by fo on 23.03.16. */ public class ResourceIndexer { Model mDb; Writable mTargetRepo; GraphHistory mGraphHistory; private final static String GLOBAL_QUERY_TEMPLATE = "SELECT DISTINCT ?s WHERE {" + " ?s a []" + "}"; private final static String SCOPE_QUERY_TEMPLATE = "SELECT DISTINCT ?s1 WHERE {" + " ?s1 ?p1 <%1$s> ." + "}"; // TODO: evaluate if there are other properties to exclude from triggering indexing private final List<RDFNode> mDoNotTrigger = Arrays.asList(new RDFNode[]{RDF.type, SCHEMA.about}); public ResourceIndexer(Model aDb, Writable aTargetRepo, GraphHistory aGraphHistory) { this.mDb = aDb; this.mTargetRepo = aTargetRepo; this.mGraphHistory = aGraphHistory; } /** * Extracts resources that need to be indexed from a triple diff * @param aDiff The diff from which to extract resources * @return The list of resources touched by the diff */ public Set<String> getScope(Commit.Diff aDiff) { Set<String> commitScope = new HashSet<>(); Set<String> indexScope = new HashSet<>(); if (aDiff.getLines().isEmpty()) { return commitScope; } for (Commit.Diff.Line line : aDiff.getLines()) { RDFNode subject = ((TripleCommit.Diff.Line)line).stmt.getSubject(); Property property = ((TripleCommit.Diff.Line)line).stmt.getPredicate(); RDFNode object = ((TripleCommit.Diff.Line)line).stmt.getObject(); if (!mDoNotTrigger.contains(property)) { if (subject.isURIResource()) { commitScope.add(subject.toString()); } if (object.isURIResource()) { commitScope.add(object.toString()); } } } indexScope.addAll(commitScope); indexScope.addAll(getScope(commitScope)); Logger.debug("Indexing scope is " + indexScope); return indexScope; } /** * Queries the triple store for related resources that must also be indexed * @param aIds The list of resources for which to find related resources * @return The list of related resources */ public Set<String> getScope(Set<String> aIds) { Set<String> indexScope = new HashSet<>(); mDb.enterCriticalSection(Lock.READ); try { for (String id : aIds) { indexScope.addAll(getScope(id)); } } finally { mDb.leaveCriticalSection(); } return indexScope; } /** * Queries the triple store for related resources that must also be indexed * @param aId A resource for which to find related resources * @return The list of related resources */ public Set<String> getScope(String aId) { Set<String> indexScope = new HashSet<>(); String query = String.format(SCOPE_QUERY_TEMPLATE, aId); try (QueryExecution queryExecution = QueryExecutionFactory.create(QueryFactory.create(query), mDb)) { ResultSet rs = queryExecution.execSelect(); while (rs.hasNext()) { QuerySolution qs = rs.next(); if (qs.contains("s1")) { indexScope.add(qs.get("s1").toString()); } if (qs.contains("s2")) { indexScope.add(qs.get("s2").toString()); } } } catch (QueryParseException e) { Logger.error("Failed to execute query " + query, e); } return indexScope; } /** * Queries the triple store for all resources to be indexed * @return The list of typed resources */ public Set<String> getScope() { Set<String> indexScope = new HashSet<>(); String query = GLOBAL_QUERY_TEMPLATE; try (QueryExecution queryExecution = QueryExecutionFactory.create(QueryFactory.create(query), mDb)) { ResultSet rs = queryExecution.execSelect(); while (rs.hasNext()) { QuerySolution qs = rs.next(); if (qs.contains("s") && qs.get("s").isURIResource()) { indexScope.add(qs.get("s").toString()); } } } catch (QueryParseException e) { Logger.error("Failed to execute query " + query, e); } Logger.debug("Indexing scope" + indexScope.toString()); return indexScope; } public Resource getResource(String aId) { try { Resource resource = ResourceFramer.resourceFromModel(mDb, aId); if (resource != null) { return resource; } } catch (IOException e) { Logger.error("Could not create resource from model", e); } return null; } public Set<Resource> getResources(String aId) { Set<Resource> resourcesToIndex = new HashSet<>(); Set<String> idsToIndex = this.getScope(aId); for (String id : idsToIndex) { resourcesToIndex.add(getResource(id)); } return resourcesToIndex; } public Set<Resource> getResources(Commit.Diff aDiff) { Set<Resource> resourcesToIndex = new HashSet<>(); Set<String> idsToIndex = this.getScope(aDiff); for (String id : idsToIndex) { resourcesToIndex.add(getResource(id)); } return resourcesToIndex; } public Set<Resource> getResources() { Set<Resource> resourcesToIndex = new HashSet<>(); Set<String> idsToIndex = this.getScope(); for (String id : idsToIndex) { resourcesToIndex.add(getResource(id)); } return resourcesToIndex; } public void index(Resource aResource) { if (aResource.hasId()) { try { Map<String, String> metadata = new HashMap<>(); if (mGraphHistory != null) { List<Commit> history = mGraphHistory.log(aResource.getId()); metadata.put(Record.CONTRIBUTOR, history.get(0).getHeader().getAuthor()); metadata.put(Record.AUTHOR, history.get(history.size() - 1).getHeader().getAuthor()); metadata.put(Record.DATE_MODIFIED, history.get(0).getHeader().getTimestamp() .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); metadata.put(Record.DATE_CREATED, history.get(history.size() - 1).getHeader().getTimestamp() .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); } metadata.put(Record.LINK_COUNT, String.valueOf(aResource.getNumberOfSubFields("**.@id"))); mTargetRepo.addResource(aResource, metadata); } catch (IndexOutOfBoundsException | IOException e) { Logger.error("Could not index resource", e); } } } public void index(Set<Resource> aResources) { long startTime = System.nanoTime(); for (Resource resource : aResources) { if (resource != null) { index(resource); } } long endTime = System.nanoTime(); long duration = (endTime - startTime) / 1000000000; Logger.debug("Done indexing, took ".concat(Long.toString(duration)).concat(" sec.")); } public void index(Commit.Diff aDiff) { Set<Resource> denormalizedResources = getResources(aDiff); index(denormalizedResources); } public void index(String aId) { Set<Resource> denormalizedResources; if (aId.equals("*")) { denormalizedResources = getResources(); } else { denormalizedResources = getResources(aId); } index(denormalizedResources); } }