package org.nextprot.api.rdf.service.impl;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.rdf.domain.RdfConstants;
import org.nextprot.api.rdf.domain.RdfTypeInfo;
import org.nextprot.api.rdf.domain.TripleInfo;
import org.nextprot.api.rdf.service.RdfHelpService;
import org.nextprot.api.rdf.service.SparqlService;
import org.nextprot.api.rdf.utils.RdfPrefixUtils;
import org.nextprot.api.rdf.utils.SparqlDictionary;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.RDFNode;
//@Lazy
@Service
public class RdfHelpServiceImpl implements RdfHelpService {
private static final Log LOGGER = LogFactory.getLog(RdfHelpServiceImpl.class);
private List<String> completeSetOfValuesForTypes = Arrays.asList(":Source", ":Database", ":SubcellularLocation", ":NextprotAnatomyCv");
private List<String> completeSetOfValuesForLiteral = Arrays.asList("NextprotAnatomyCv/rdfs:label", ":SubcellularLocation/rdfs:comment");
private static final List<String> RDF_TYPES_TO_EXCLUDE = Arrays.asList(":childOf", "rdf:Property");
private @Autowired SparqlDictionary sparqlDictionary = null;
private @Autowired SparqlService sparqlService = null;
// with 10 threads, duration is 18 minutes but the result is incomplete (some triples are missing !)
// with 1 thread, duration is 57 minutes but the result is ok.
private final int NUMBER_THREADS = 1;
private int errorCount=0;
private synchronized void incrementErrors() {
errorCount++;
}
@Cacheable("rdfhelp")
@Override
public synchronized List<RdfTypeInfo> getRdfTypeFullInfoList() {
long t0 = System.currentTimeMillis();
Set<String> rdfTypesNames = getRdfTypesNames();
List<Future<RdfTypeInfo>> rdfFutureTypes = new ArrayList<Future<RdfTypeInfo>>();
List<RdfTypeInfo> rdfTypes = Collections.synchronizedList(new ArrayList<RdfTypeInfo>());
ExecutorService executor = Executors.newFixedThreadPool(NUMBER_THREADS);
for (String rdfTypeName : rdfTypesNames) {
//LOGGER.info("step1 - found rdf:type name " + rdfTypeName);
Future<RdfTypeInfo> futureRdfTypeInfo = executor.submit(new FillRdfTypeInfoTask(this, rdfTypeName));
rdfFutureTypes.add(futureRdfTypeInfo);
}
executor.shutdown();
try {
executor.awaitTermination(1, TimeUnit.DAYS);
} catch (InterruptedException e) {
e.printStackTrace();
throw new NextProtException(e.getLocalizedMessage());
}
for (Future<RdfTypeInfo> futureRdfTypeInfo : rdfFutureTypes) {
try {
rdfTypes.add(futureRdfTypeInfo.get());
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
// now populate parent and parent triples of each type
for (RdfTypeInfo rti : rdfTypes) {
//LOGGER.info("step2 - updating rdf:type " + rti.getTypeName());
for (RdfTypeInfo parent : rdfTypes) {
List<TripleInfo> triples = parent.findTriplesWithObjectType(rti.getTypeName());
if (triples.size() > 0) {
//LOGGER.info("step3 - linking parent rdf:type " + parent.getTypeName() + " to rdf:type " + rti.getTypeName() + " , triple size: " + triples.size());
rti.addParent(parent.getTypeName());
for (TripleInfo triple : triples)
rti.addParentTriple(triple);
}
}
}
Map<String, RdfTypeInfo> fullMap = new HashMap<String, RdfTypeInfo>();
for (RdfTypeInfo rti : rdfTypes) {
fullMap.put(rti.getTypeName(), rti);
}
if (fullMap.containsKey(":Entry")) buildPathToOrigin(fullMap, fullMap.get(":Entry"), "?entry ", 0);
long seconds = (System.currentTimeMillis()-t0)/1000;
String duration = String.format("%d:%02d:%02d", seconds/3600, (seconds%3600)/60, (seconds%60)) + " [H:MM:SS]";
LOGGER.info("errors: " + errorCount);
LOGGER.info("duration: " + duration);
return rdfTypes;
}
private static void buildPathToOrigin(final Map<String, RdfTypeInfo> fullMap, RdfTypeInfo currentEntry, String currentPath, int currenDepth) {
if (currenDepth > 20)
return;
for (TripleInfo childTripleInfo : currentEntry.getTriples()) {
RdfTypeInfo childTypeInfo = fullMap.get(childTripleInfo.getObjectType());
if (childTypeInfo != null && !currentPath.contains(childTripleInfo.getPredicate()) && !childTripleInfo.getPredicate().equals(":interaction")) {
String nextPath = currentPath + childTripleInfo.getPredicate();
childTypeInfo.addPathToOrigin(nextPath);
buildPathToOrigin(fullMap, childTypeInfo, nextPath + "/", currenDepth + 1);
}
}
}
@Override
public RdfTypeInfo getRdfTypeFullInfo(String rdfTypeName) {
RdfTypeInfo rdfTypeInfo = new RdfTypeInfo();
rdfTypeInfo.setTypeName(rdfTypeName);
Map<String, String> properties = getRdfTypeProperties(rdfTypeName);
if(!properties.isEmpty()){
rdfTypeInfo.setTypeName(properties.get("rdfType"));
rdfTypeInfo.setRdfsLabel(properties.get("label"));
rdfTypeInfo.setRdfsComment(properties.get("comment"));
rdfTypeInfo.setInstanceCount(Integer.valueOf(properties.get("instanceCount")));
rdfTypeInfo.setInstanceSample(properties.get("instanceSample"));
}
List<TripleInfo> triples = getTripleInfoList(rdfTypeInfo.getTypeName());
Set<String> values = null;
if (completeSetOfValuesForTypes.contains(rdfTypeInfo.getTypeName())) {
values = getRdfTypeValues(rdfTypeName, Integer.MAX_VALUE);
} else {
values = getRdfTypeValues(rdfTypeName, 20);
}
rdfTypeInfo.setValues(values);
for (TripleInfo triple : triples) {
rdfTypeInfo.addTriple(triple);
if (triple.isLiteralType() && (!triple.getObjectType().equals(RdfConstants.BLANK_OBJECT_TYPE))) {
String typeLiteral = rdfTypeName + "/" + triple.getPredicate();
Set<String> exampleValues = null;
if (completeSetOfValuesForLiteral.contains(typeLiteral)) {
exampleValues = getValuesForTriple(rdfTypeName, triple.getPredicate(), Integer.MAX_VALUE);
} else {
exampleValues = getValuesForTriple(rdfTypeName, triple.getPredicate(), 50);
}
triple.setValues(exampleValues);
}
}
return rdfTypeInfo;
}
@Override
public List<String> getRdfTypeValues(String rdfTypeName) {
return new ArrayList<String>(getRdfTypeValues(rdfTypeName, Integer.MAX_VALUE));
}
//Task
private static class FillRdfTypeInfoTask implements Callable<RdfTypeInfo> {
private String rdfType = null;
private RdfHelpService rdfTypeInfoService;
public FillRdfTypeInfoTask(RdfHelpService rdfTypeInfoService, String rdfType) {
this.rdfType = rdfType;
this.rdfTypeInfoService = rdfTypeInfoService;
}
@Override
public RdfTypeInfo call() {
LOGGER.info("Calling " + rdfType);
return rdfTypeInfoService.getRdfTypeFullInfo(rdfType);
}
}
private int getMaxRdfTypes() {
int max = -1; // all rdf types are retrieved
String maxStr = System.getProperty("rdftype.max");
if (maxStr!=null && maxStr.length()>0) max = Integer.parseInt(maxStr);
LOGGER.info(max==-1 ? "Retrieving all RDF types" : "Retrieving sample of RDF types, size = " + max);
return max;
}
private Set<String> getRdfTypesNames() {
Set<String> result = new TreeSet<String>();
String query = sparqlDictionary.getSparqlWithPrefixes("alldistincttypes");
QueryExecution qExec = sparqlService.queryExecution(query);
ResultSet rs = qExec.execSelect();
int max = getMaxRdfTypes();
int cnt=0;
while (rs.hasNext()) {
String rdfTypeName = (String) getDataFromSolutionVar(rs.next(), "rdfType");
if (!RDF_TYPES_TO_EXCLUDE.contains(rdfTypeName)) {
if (!rdfTypeName.startsWith("http://") && !rdfTypeName.startsWith("owl:") && !rdfTypeName.startsWith("rdfs:Class")) {
if (!result.contains(rdfTypeName)) {
cnt++;
if (cnt>=max && max!=-1) break;
result.add(rdfTypeName);
} else {
LOGGER.warn(rdfTypeName + " is not unique");
}
} else {
LOGGER.info("Skipping " + rdfTypeName);
}
}
}
qExec.close();
LOGGER.info("RdfType found: " + result.size());
return result;
}
private Map<String, String> getRdfTypeProperties(String rdfType) {
Map<String, String> properties = new HashMap<String, String>();
String queryBase = sparqlDictionary.getSparqlOnly("typenames");
String query = sparqlDictionary.getSparqlPrefixes();
query += queryBase.replace(":SomeRdfType", rdfType);
QueryExecution qExec = sparqlService.queryExecution(query);
ResultSet rs = qExec.execSelect();
if (rs.hasNext()) {
QuerySolution sol = rs.next();
properties.put("rdfType", (String) getDataFromSolutionVar(sol, "rdfType"));
properties.put("label", (String) getDataFromSolutionVar(sol, "label"));
properties.put("comment", (String) getDataFromSolutionVar(sol, "comment"));
properties.put("instanceCount", (String) getDataFromSolutionVar(sol, "instanceCount"));
properties.put("instanceSample", (String) getDataFromSolutionVar(sol, "instanceSample"));
}
qExec.close();
return properties;
}
@Override
public List<TripleInfo> getTripleInfoList(String rdfType) {
String queryBase = sparqlDictionary.getSparqlWithPrefixes("typepred");
Set<TripleInfo> tripleList = new TreeSet<TripleInfo>();
try {
String query = sparqlDictionary.getSparqlOnly("prefix");
query += queryBase.replace(":SomeRdfType", rdfType);
QueryExecution qExec = sparqlService.queryExecution(query);
ResultSet rs = qExec.execSelect();
while (rs.hasNext()) {
QuerySolution sol = rs.next();
TripleInfo ti = new TripleInfo();
String pred = (String) getDataFromSolutionVar(sol, "pred");
String sspl = (String) getDataFromSolutionVar(sol, "subjSample");
String ospl = (String) getDataFromSolutionVar(sol, "objSample", true);
String spl = sspl + " " + pred + " " + ospl + " .";
ti.setTripleSample(spl);
ti.setPredicate(pred);
ti.setSubjectType((String) getDataFromSolutionVar(sol, "subjType"));
String objectType = (String) getDataFromSolutionVar(sol, "objType");
if (objectType.length() == 0) {
objectType = getObjectTypeFromSample(sol, "objSample");
ti.setLiteralType(true);
}
ti.setObjectType(objectType);
ti.setTripleCount(Integer.valueOf((String) getDataFromSolutionVar(sol, "objCount")));
LOGGER.info(ti);
tripleList.add(ti);
}
qExec.close();
} catch (Exception e) {
incrementErrors();
System.err.println("Error with " + rdfType );
e.printStackTrace();
LOGGER.error("Error with " + rdfType, e);
}
return new ArrayList<TripleInfo>(tripleList);
}
private Set<String> getRdfTypeValues(String rdfTypeInfoName, int limit) {
Set<String> values = new TreeSet<String>();
//TODO add a method with a map of named parameters in the sparql dictionary
String queryBase = sparqlDictionary.getSparqlOnly("typevalues");
String query = sparqlDictionary.getSparqlPrefixes();
query += queryBase.replace(":SomeRdfType", rdfTypeInfoName).replace(":LimitResults", String.valueOf(limit));
QueryExecution qExec = sparqlService.queryExecution(query);
ResultSet rs = qExec.execSelect();
while (rs.hasNext()) {
QuerySolution sol = rs.next();
String value = (String) getDataFromSolutionVar(sol, "value");
if(value.startsWith("annotation:")){
values.add("Example: " + value);
break;
}else {
values.add(value);
}
}
qExec.close();
//Reduce the json if the list is not complete, just put a simple example
if(values.size() == limit){
Iterator<String> it = values.iterator();
String sample1 = it.next();
values.clear();
values.add("Example: " + sample1);
}
return values;
}
private Set<String> getValuesForTriple(String rdfTypeName, String predicate, int limit) {
Set<String> values = new TreeSet<String>();
String queryBase = sparqlDictionary.getSparqlOnly("getliteralvalues");
String query = sparqlDictionary.getSparqlPrefixes();
query += queryBase.replace(":SomeRdfType", rdfTypeName).replace(":SomePredicate", predicate).replace(":LimitResults", String.valueOf(limit));
QueryExecution qExec = sparqlService.queryExecution(query);
ResultSet rs = qExec.execSelect();
while (rs.hasNext()) {
QuerySolution sol = rs.next();
values.add((String) getDataFromSolutionVar(sol, "value"));
}
qExec.close();
//Reduce the json if the list is not complete, just put a simple example
//Reduce the json if the list is not complete, just put a simple example
if(values.size() == limit){
Iterator<String> it = values.iterator();
String sample1 = it.next();
values.clear();
values.add("Example: " + sample1);
}
return values;
}
/**
* Private static methods
*/
private Object getDataFromSolutionVar(QuerySolution sol, String var) {
return getDataFromSolutionVar(sol, var, false);
}
private Object getDataFromSolutionVar(QuerySolution sol, String var, boolean useQuotes) {
RDFNode n = sol.get(var);
if (n == null)
return "";
RDFBasicVisitor rdfVisitor = new RDFBasicVisitor(sparqlDictionary.getSparqlPrefixes());
rdfVisitor.setSurroundLiteralStringWithQuotes(useQuotes);
return n.visitWith(rdfVisitor);
}
private String getObjectTypeFromSample(QuerySolution sol, String objSample) {
try {
Literal lit = sol.getLiteral(objSample);
String typ = lit.getDatatypeURI();
return RdfPrefixUtils.getPrefixedNameFromURI(sparqlDictionary.getSparqlPrefixes(), typ);
} catch (Exception e) {
LOGGER.error("Failed for " + objSample, e);
return RdfConstants.BLANK_OBJECT_TYPE;
}
}
}