package com.github.lindenb.jvarkit.util.go; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.EndElement; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import com.github.lindenb.jvarkit.io.IOUtils; public class GoTree { private static final String NS="http://www.geneontology.org/dtds/go.dtd#"; private static final String PREFIX="http://www.geneontology.org/go#"; public static interface Term { public String getAcn(); public String getLabel(); public Set<Term> getParents(); public Set<Term> getChildren(); public boolean isDescendantOf(String acn); public boolean hasDescendant(String acn); //public Set<Term> getAllParents(); //public Set<Term> getAllChildren(); } private GoTree() { } public int size() { return uri2term.size(); } public List<? extends Term> getTerms() { return new ArrayList<Term>(uri2term.values()); } public void dump() { for(String s: uri2term.keySet()) { Term t=uri2term.get(s); System.out.println(s+" "+t.getAcn()+" "+t.getLabel()+" "+t.getChildren()+" "+t.getParents()); } } private HashMap<String, TermImpl> uri2term=new HashMap<String, TermImpl>(); private class TermImpl implements Term { String accession; String name; Set<String> parents=new HashSet<String>(); Set<String> children=new HashSet<String>(); @Override public String getAcn() { return accession; } @Override public String getLabel() { return name; } private Set<Term> convert(Set<String> S1) { Set<Term> S2=new HashSet<Term>(S1.size()); for(String s:S1) { Term t=uri2term.get(s); if(t==null) continue; S2.add(t); } return S2; } @Override public Set<Term> getChildren() { return convert(children); } @Override public Set<Term> getParents() { return convert(parents); } /* private void _getAllChildren(Set<String> seen) { for(String s:this.children) { TermImpl t=uri2term.get(s); if(t==null) continue; seen.add(s); t._getAllChildren(seen); } } private void _getAllParents(Set<String> seen) { for(String s:this.parents) { TermImpl t=uri2term.get(s); if(t==null) continue; seen.add(s); t._getAllParents(seen); } } public Set<Term> getAllChildren() { Set<String> seen=new HashSet<String>(); _getAllChildren(seen); return convert(seen); } //@Override public Set<Term> getAllParents() { Set<String> seen=new HashSet<String>(); _getAllParents(seen); return convert(seen); }*/ @Override public boolean isDescendantOf(String parentAcn) { if(parentAcn.equals(this.accession)) return true; if(!parentAcn.startsWith(PREFIX)) parentAcn=PREFIX+parentAcn; for(String p:this.parents) { TermImpl pNode=uri2term.get(p); if(pNode==null || pNode==this) continue; if(pNode.isDescendantOf(parentAcn)) return true; } return false; } @Override public boolean hasDescendant(String descendantAcn) { if(descendantAcn.equals(this.accession)) return true; if(!descendantAcn.startsWith(PREFIX)) descendantAcn=PREFIX+descendantAcn; for(String p:this.children) { TermImpl pNode=uri2term.get(p); if(pNode==null || pNode==this) continue; if(pNode.hasDescendant(descendantAcn)) return true; } return false; } @Override public int hashCode() { return accession.hashCode(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; TermImpl other = (TermImpl) obj; return accession.equals(other.accession); } @Override public String toString() { return accession; } } public Term getTermByAccession(String s) { if(!s.startsWith(PREFIX)) s=PREFIX+s; return (Term)uri2term.get(s); } private static final QName rdfAbout=new QName("http://www.w3.org/1999/02/22-rdf-syntax-ns#","about","rdf"); private static final QName rdfRsrc=new QName("http://www.w3.org/1999/02/22-rdf-syntax-ns#","resource","rdf"); private void parseTerm(StartElement root,XMLEventReader r) throws IOException,XMLStreamException { Attribute aboutAtt=root.getAttributeByName(rdfAbout); if(aboutAtt==null) { throw new IOException("no rdf:about"); } TermImpl term=uri2term.get(aboutAtt.getValue()); if(term==null) { term=new TermImpl(); term.accession=aboutAtt.getValue(); if(term.accession.startsWith(PREFIX)) { term.accession=term.accession.substring(PREFIX.length()); } term.name=term.accession; uri2term.put(aboutAtt.getValue(),term); } while(r.hasNext()) { XMLEvent evt=r.nextEvent(); if(evt.isStartElement()) { StartElement E=evt.asStartElement(); QName qN=E.getName(); if( NS.equals(qN.getNamespaceURI())) { if(qN.getLocalPart().equals("accession")) { term.accession=r.getElementText(); } else if(qN.getLocalPart().equals("name")) { term.name=r.getElementText(); } else if(qN.getLocalPart().equals("is_a")) { Attribute rsrc=E.getAttributeByName(rdfRsrc); if(rsrc==null) throw new IOException("att missing "+rdfRsrc+" for "+aboutAtt.getValue()); String parentUri=rsrc.getValue(); term.parents.add(parentUri); TermImpl parentTerm=this.uri2term.get(parentUri); if(parentTerm==null) { parentTerm=new TermImpl(); parentTerm.accession=parentUri; if(parentTerm.accession.startsWith(PREFIX)) { parentTerm.accession=parentTerm.accession.substring(PREFIX.length()); } parentTerm.name=parentTerm.accession; uri2term.put(parentUri,parentTerm); } parentTerm.children.add(aboutAtt.getValue()); } } } else if(evt.isEndElement()) { EndElement E=evt.asEndElement(); QName qN=E.getName(); if(qN.getLocalPart().equals("term") && NS.equals(qN.getNamespaceURI())) { break; } } } } public static GoTree parse(Reader xmlIn) throws IOException,XMLStreamException { GoTree tree=new GoTree(); XMLInputFactory fact=XMLInputFactory.newFactory(); fact.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE); XMLEventReader r=fact.createXMLEventReader(xmlIn); while(r.hasNext()) { XMLEvent evt=r.nextEvent(); if(evt.isStartElement()) { StartElement E=evt.asStartElement(); QName qN=E.getName(); if(qN.getLocalPart().equals("term") && NS.equals(qN.getNamespaceURI())) { tree.parseTerm(E,r); } } } r.close(); return tree; } public static GoTree parse(File file) throws IOException,XMLStreamException { BufferedReader r=IOUtils.openFileForBufferedReading(file); GoTree t=parse(r); r.close(); return t; } public static GoTree parse(String uri) throws IOException,XMLStreamException { BufferedReader r=IOUtils.openURIForBufferedReading(uri); GoTree t=parse(r); r.close(); return t; } }