package org.nextprot.api.core.utils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nextprot.api.commons.constants.TerminologyCv;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.commons.utils.Tree;
import org.nextprot.api.commons.utils.Tree.Node;
import org.nextprot.api.core.domain.CvTerm;
import org.nextprot.api.core.domain.DbXref;
import org.nextprot.api.core.domain.Terminology;
import org.nextprot.api.core.service.TerminologyService;
import org.nextprot.api.core.utils.graph.OntologyDAG;
import java.util.*;
import java.util.stream.Collectors;
//import org.nextprot.api.core.domain.TerminologyProperty;
public class TerminologyUtils {
private static final Log LOGGER = LogFactory.getLog(TerminologyUtils.class);
public static List<CvTerm.TermProperty> convertToProperties(String propliststring, Long termid, String termacc) {
if (propliststring == null) return null;
// Decomposes a pipe-separated string (generated by a SQL query) in a list property objects containing name/value pairs
List<CvTerm.TermProperty> properties = new ArrayList<CvTerm.TermProperty>();
// keep spaces in splitting pattern, since pipe alone can occur within fields
List<String> allprop = Arrays.asList(propliststring.split(" \\| "));
for (String propertystring : allprop) {
// The splitter is ':=' since both ':' and '=' can occur alone within field
List<String> currprop = Arrays.asList(propertystring.split(":="));
if(currprop.size() != 2) {
String msg = "Problem with property in " + termacc + ": " + propertystring + " propString: " + propertystring;
LOGGER.warn(msg);
System.err.println(msg);
continue;
}
CvTerm.TermProperty property = new CvTerm.TermProperty();
String propertyName = currprop.get(0) ;
property.setPropertyName(propertyName);
String propertyValue = currprop.get(1);
property.setPropertyValue(propertyValue);
property.settermId(termid);
properties.add(property);
}
return properties;
}
/**
* Get all ancestors of the given cvterm
*
* @param cvTermAccession the cvterm accession
* @param terminologyservice the terminology service
* @return a list of cvterm ancestor accessions
*/
public static List<String> getAllAncestorsAccession(String cvTermAccession, TerminologyService terminologyservice) {
return getAllAncestorTerms(cvTermAccession, terminologyservice).stream()
.map(t->t.getAccession())
.collect(Collectors.toList());
}
public static List<CvTerm> getAllAncestorTerms(String cvTermAccession, TerminologyService terminologyservice) {
CvTerm cvTerm = terminologyservice.findCvTermByAccession(cvTermAccession);
OntologyDAG graph = terminologyservice.findOntologyGraph(TerminologyCv.valueOf(cvTerm.getOntology()));
return Arrays.stream(graph.getAncestors(cvTerm.getId())).boxed()
.map(graph::getCvTermAccessionById)
.map(ac->terminologyservice.findCvTermByAccession(ac))
.collect(Collectors.toList());
}
/**
* Returns an ordered list of terms.
* The first term is the Term identified with the parameter cvTermAccession
* The next term is a parent of the previous term until we reach the root term
* A known limitation is that only the first parent is retrieved for each term !
*
* @param cvTermAccession
* @param terminologyservice
* @return
*/
public static List<CvTerm> getOnePathToRootTerm(String cvTermAccession, TerminologyService terminologyservice) {
List<CvTerm> path = new ArrayList<CvTerm>();
String ac = cvTermAccession;
while (true) {
CvTerm t = terminologyservice.findCvTermByAccession(ac);
path.add(t);
List<String> parents = t.getAncestorAccession();
if (parents==null || parents.size()==0) break;
ac = parents.get(0);
}
return path;
}
/**
* @deprecated use #getAllAncestors() instead
*/
@Deprecated
public static List<String> getAllAncestorsOld(String cvterm, TerminologyService terminologyservice) {
Set<String> finalSet = new TreeSet<String>();
Set<String> multiParentSet = new TreeSet<String>();
Set<String> multiSetCurrent = new TreeSet<String>();
List<String> mylist = Arrays.asList("XXX");
String currTerm = cvterm;
while(!mylist.isEmpty()) {
CvTerm cvt = terminologyservice.findCvTermByAccession(currTerm);
if (cvt == null ) {
LOGGER.error(cvterm + " does not exist");
break;
}
mylist = cvt.getAncestorAccession();
if(mylist == null) break;
if(mylist.size() > 1) for (int i=1; i<mylist.size(); i++) multiParentSet.add(mylist.get(i));
// when root loop on itself !
if (currTerm.equals(mylist.get(0)))
break;
currTerm = mylist.get(0);
finalSet.add(currTerm);
}
while(!multiParentSet.isEmpty()) {
multiSetCurrent.clear();
multiSetCurrent.addAll(multiParentSet);
for(String cv : multiSetCurrent) {
finalSet.add(cv);
multiParentSet.remove(cv);
CvTerm cvt = terminologyservice.findCvTermByAccession(cv);
if (cvt == null ) {
LOGGER.error(cv + " does not exist");
break;
}
mylist = cvt.getAncestorAccession();
if(mylist == null) break;
while(mylist != null && !mylist.isEmpty()) {
if(mylist.size() > 1)
for (int i=1; i<mylist.size(); i++)
multiParentSet.add(mylist.get(i));
// when root loop on itself !
if (currTerm.equals(mylist.get(0)))
break;
currTerm = mylist.get(0);
finalSet.add(currTerm);
mylist = terminologyservice.findCvTermByAccession(currTerm).getAncestorAccession();
}
}
}
return(new ArrayList<>(finalSet));
}
public static List<DbXref> convertToXrefs (String xrefsstring) {
if (xrefsstring == null) return null;
// Builds DbXref list from String of xrefs formatted as "dbcat, db, acc, linkurl" quartetss separated by pipes
List<DbXref> xrefs = new ArrayList<>();
List<String> allxrefs = Arrays.asList(xrefsstring.split(" \\| "));
for (String onexref: allxrefs) {
List<String> fields = Arrays.asList(onexref.split("\\^ "));
DbXref dbref = new DbXref();
dbref.setDatabaseCategory(fields.get(0));
dbref.setDatabaseName(fields.get(1));
dbref.setAccession(fields.get(2));
dbref.setDbXrefId(Long.parseLong(fields.get(3)));
String url = null;
String linkurl = null;
if (fields.size() > 4) {
url = fields.get(4);
if (fields.size() > 5)
linkurl = fields.get(5);
}
if (url == null || url.isEmpty() || "none".equalsIgnoreCase(url)) {
dbref.setUrl("None");
dbref.setLinkUrl("None");
}
else {
dbref.setUrl(url);
dbref.setLinkUrl(linkurl);
}
xrefs.add(dbref);
}
return xrefs;
}
public static String convertPropertiesToString(List<CvTerm.TermProperty> properties) {
if (properties == null) return null;
// Build a String where propertyname/propertyvalue pairs are separated by pipes
StringBuilder sb = new StringBuilder();
int i = properties.size();
for (CvTerm.TermProperty property : properties) {
sb.append(property.getPropertyName());
//sb.append(":=");
sb.append(":");
sb.append(property.getPropertyValue());
if(--i != 0)
sb.append(" | ");
}
return sb.toString();
}
public static String convertXrefsToString(List<DbXref> xrefs) {
if (xrefs == null) return null;
// Build a String of xrefs formatted as "dbcat, db:acc" pairs separated by pipes
StringBuilder sb = new StringBuilder();
int i = xrefs.size();
for (DbXref xref : xrefs) {
sb.append(xref.getDatabaseCategory());
sb.append(", ");
sb.append(xref.getDatabaseName());
sb.append(":");
sb.append(xref.getAccession());
if(--i != 0)
sb.append(" | ");
}
return sb.toString();
}
public static String convertXrefsToSolrString(List<DbXref> xrefs) {
if (xrefs == null) return null;
// Build a String of xrefs for solr formatted as "acc, db:acc" pairs separated by pipes
StringBuilder sb = new StringBuilder();
int i = xrefs.size();
for (DbXref xref : xrefs) {
sb.append(xref.getAccession());
sb.append(", ");
sb.append(xref.getDatabaseName());
sb.append(":");
sb.append(xref.getAccession());
if(--i != 0)
sb.append(" | ");
}
return sb.toString();
}
public static List<String> convertXrefsToSameAsStrings(List<DbXref> xrefs) {
if (xrefs == null) return null;
// Build List of strings of xref accessions as needed for the old Terminology.getSameAs method
List<String> sameas = new ArrayList<String>();
for (DbXref xref : xrefs) {
sameas.add(xref.getAccession());
}
return sameas;
}
public static Map<String, CvTerm> convertToTerminologyMap(List<CvTerm> terms) {
Map<String, CvTerm> termMap = new HashMap<>();
for(CvTerm term: terms){
termMap.put(term.getAccession(), term);
}
return termMap;
}
public static Terminology convertCvTermsToTerminology(List<CvTerm> terms, final int maxDepth) {
String topLevelTermPrefix = "CVAN";
Terminology terminology = new Terminology();
for(CvTerm term: terms){
//System.err.println(term.getAccession() + " " + term.getAncestorAccession());
if((term.getAncestorAccession() == null) || (term.getAncestorAccession().isEmpty())){ //root
terminology.addTreeRoot(term);
} else {
//For example DO-00218 from the terminology NextprotDomain, has as ancestor the top level terminology cv annotation (CVAN)
boolean localRoot = false;
if(!term.getAccession().startsWith(topLevelTermPrefix)){ //TOP Level domain (case where other terminologies link to this one)
for(String ancestorAccession : term.getAncestorAccession()){
if(ancestorAccession.startsWith(topLevelTermPrefix)){
localRoot = true;
break;
}
}
}
if(localRoot){
terminology.addTreeRoot(term);
}
}
}
for(Tree<CvTerm> tree : terminology){
populateTree(tree.getRoot(), convertToTerminologyMap(terms), 0, maxDepth);
}
return terminology;
}
static void populateTree(Tree.Node<CvTerm> currentNode, Map<String, CvTerm> termMap, int depth, final int maxDepth) {
if(depth > maxDepth) return;
if(depth > 100) throw new NextProtException("Getting stuck in building graph");
if(currentNode.getValue() == null || currentNode.getValue().getChildAccession() == null || currentNode.getValue().getChildAccession().isEmpty()) {
return;
}
for(String childAccession : currentNode.getValue().getChildAccession()){
CvTerm childTerm = termMap.get(childAccession);
if(childTerm != null) { // may be null in case of the terminology being another one like DO
if(currentNode.getChildren() == null){
currentNode.setChildren(new ArrayList<Tree.Node<CvTerm>>());
}
Tree.Node<CvTerm> childNode = new Tree.Node<CvTerm>(childTerm);
childNode.setParents(Arrays.asList(currentNode));
currentNode.getChildren().add(childNode);
populateTree(childNode, termMap, depth+1, maxDepth);
}
}
}
public static List<Node<CvTerm>> getNodeListByName(Tree<CvTerm> tree, String accession) {
List<Node<CvTerm>> result = new ArrayList<>();
getNodeListByNameAndPopulateResult(result, tree.getRoot(), accession);
return result;
}
private static void getNodeListByNameAndPopulateResult(List<Node<CvTerm>> currentResult, Node<CvTerm> node, String accession) {
if(node.getValue().getAccession().equals(accession)){
currentResult.add(node);
return;
}
if(node.getChildren() != null && !node.getChildren().isEmpty()){
for(Node<CvTerm> child : node.getChildren()){
getNodeListByNameAndPopulateResult(currentResult, child, accession);
}
}
}
}