package org.phenoscape.io.nexml_1_0;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.apache.log4j.Logger;
import org.apache.xmlbeans.XmlAnySimpleType;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.bioontologies.obd.schema.pheno.PhenotypeDocument;
import org.nexml.x10.AbstractBlock;
import org.nexml.x10.AbstractChar;
import org.nexml.x10.AbstractObs;
import org.nexml.x10.AbstractObsMatrix;
import org.nexml.x10.AbstractObsRow;
import org.nexml.x10.AbstractState;
import org.nexml.x10.AbstractStates;
import org.nexml.x10.Annotated;
import org.nexml.x10.Dict;
import org.nexml.x10.NexmlDocument;
import org.nexml.x10.StandardCells;
import org.nexml.x10.StandardChar;
import org.nexml.x10.StandardFormat;
import org.nexml.x10.StandardStates;
import org.nexml.x10.Taxa;
import org.obo.datamodel.IdentifiedObject;
import org.obo.datamodel.OBOClass;
import org.obo.datamodel.OBOSession;
import org.obo.datamodel.impl.DanglingClassImpl;
import org.phenoscape.io.PhenoXMLAdapter;
import org.phenoscape.model.Character;
import org.phenoscape.model.DataSet;
import org.phenoscape.model.Phenotype;
import org.phenoscape.model.Specimen;
import org.phenoscape.model.State;
import org.phenoscape.model.Taxon;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class NeXMLReader_1_0 {
private final DataSet data = new DataSet();
private final NexmlDocument xmlDoc;
private final OBOSession session;
private final Set<String> danglers = new HashSet<String>();
private final Set<String> secondaryIDs = new HashSet<String>();
private String charactersBlockID = UUID.randomUUID().toString();
private final Map<String, State> allStates = new HashMap<String, State>();
public NeXMLReader_1_0(File aFile, OBOSession session) throws XmlException, IOException {
this.session = session;
this.xmlDoc = NexmlDocument.Factory.parse(aFile);
this.parseNeXML();
}
public NeXMLReader_1_0(Reader aReader, OBOSession session) throws XmlException, IOException {
this.session = session;
this.xmlDoc = NexmlDocument.Factory.parse(aReader);
this.parseNeXML();
}
public DataSet getDataSet() {
return this.data;
}
public NexmlDocument getXMLDoc() {
return this.xmlDoc;
}
public String getCharactersBlockID() {
return this.charactersBlockID;
}
/**
* Returns true if the reader had to create dangling terms for referenced IDs not found in the OBOSession.
*/
public boolean didCreateDanglers() {
return !this.danglers.isEmpty();
}
/**
* Returns the list of IDs referenced in the file that were not found in the OBOSession.
*/
public Collection<String> getDanglersList() {
return this.danglers;
}
/**
* Returns true if the reader had to find any referenced terms via a secondary ID.
*/
public boolean didMigrateSecondaryIDs() {
return !this.secondaryIDs.isEmpty();
}
/**
* Returns the list of IDs referenced in the file that were found to be secondary IDs.
*/
public Collection<String> getMigratedSecondaryIDsList() {
return this.secondaryIDs;
}
private void parseNeXML() {
final Dict metadata = NeXMLUtil_1_0.findOrCreateMetadataDict(this.xmlDoc);
this.parseMetadata(metadata);
for (AbstractBlock block : this.xmlDoc.getNexml().getCharactersArray()) {
if (block instanceof StandardCells) {
this.charactersBlockID = block.getId();
final StandardCells cells = (StandardCells)block;
this.parseStandardCells(cells);
final Taxa taxa = NeXMLUtil_1_0.findOrCreateTaxa(this.xmlDoc, cells.getOtus());
this.parseTaxa(taxa);
final AbstractObsMatrix abstractMatrix = cells.getMatrix();
if (abstractMatrix != null) {
this.parseMatrix(abstractMatrix);
}
break;
}
}
}
private void parseStandardCells(StandardCells standardCells) {
if (!(standardCells.getFormat() instanceof StandardFormat)) return;
final StandardFormat format = (StandardFormat)(standardCells.getFormat());
for (AbstractChar abstractChar : format.getCharArray()) {
if (!(abstractChar instanceof StandardChar)) continue;
final StandardChar standardChar = (StandardChar)abstractChar;
final Character newCharacter;
if (standardChar.getStates() != null) {
newCharacter = new Character(standardChar.getId(), standardChar.getStates());
} else {
newCharacter = new Character(standardChar.getId());
}
newCharacter.setLabel(standardChar.getLabel());
newCharacter.setComment(this.getComment(standardChar));
newCharacter.setFigure(this.getFigure(standardChar));
final AbstractStates states = NeXMLUtil_1_0.findOrCreateStates(format, newCharacter.getStatesNexmlID());
if (states instanceof StandardStates) {
for (AbstractState abstractState : states.getStateArray()) {
final State newState = new State(abstractState.getId());
newState.setSymbol(this.readSymbol(abstractState));
newState.setLabel(abstractState.getLabel());
newState.setComment(this.getComment(abstractState));
newState.setFigure(this.getFigure(abstractState));
this.allStates.put(newState.getNexmlID(), newState);
final Dict phenotypeDict = NeXMLUtil_1_0.findOrCreateDict(abstractState, "OBO_phenotype", abstractState.getDomNode().getOwnerDocument().createElement("any"));
final Element any = NeXMLUtil_1_0.getFirstChildWithTagName((Element)(phenotypeDict.getDomNode()), "any");
final Element phenoXML = NeXMLUtil_1_0.getFirstChildWithTagNameNS(any, "http://www.bioontologies.org/obd/schema/pheno", "phenotype");
if (phenoXML != null) {
try {
PhenotypeDocument xmlPhen = org.bioontologies.obd.schema.pheno.PhenotypeDocument.Factory.parse(phenoXML);
PhenoXMLAdapter adapter = new PhenoXMLAdapter(this.session);
List<Phenotype> phenotypes = adapter.parsePhenotype(xmlPhen.getPhenotype());
for (Phenotype phenotype : phenotypes) {
newState.addPhenotype(phenotype);
}
this.danglers.addAll(adapter.getDanglersList());
this.secondaryIDs.addAll(adapter.getMigratedSecondaryIDsList());
} catch (XmlException e) {
log().error("Failed to parse OBO phenotype", e);
}
}
newCharacter.addState(newState);
}
}
this.data.addCharacter(newCharacter);
}
}
/**
* The NeXML schema forces symbols to be an integer, but I
* believe this is incorrect. This method should allow us
* to read any kind of symbol.
*/
private String readSymbol(AbstractState state) {
final XmlAnySimpleType symbol = state.getSymbol();
final Attr attribute = (Attr)(symbol.getDomNode());
return attribute.getValue();
}
private void parseTaxa(Taxa taxa) {
for (org.nexml.x10.Taxon xmlTaxon : taxa.getOtuArray()) {
final Taxon newTaxon = new Taxon(xmlTaxon.getId());
newTaxon.setPublicationName((xmlTaxon.getLabel() == null || xmlTaxon.getLabel().equals("")) ? null : xmlTaxon.getLabel());
final Dict oboIDDict = NeXMLUtil_1_0.findOrCreateDict(xmlTaxon, "OBO_ID", xmlTaxon.getDomNode().getOwnerDocument().createElement("string"));
for (String id : oboIDDict.getStringArray()) {
final String taxonID = id.trim();
if ((taxonID != null) && (!taxonID.equals(""))) {
newTaxon.setValidName(this.getTerm(taxonID));
}
break; // there should only be one String element anyway
}
newTaxon.setComment(this.getComment(xmlTaxon));
newTaxon.setFigure(this.getFigure(xmlTaxon));
newTaxon.setMatrixTaxonName(this.getMatrixTaxon(xmlTaxon));
final Dict specimensDict = NeXMLUtil_1_0.findOrCreateDict(xmlTaxon, "OBO_specimens", xmlTaxon.getDomNode().getOwnerDocument().createElement("any"));
for (XmlObject xmlObj : specimensDict.getAnyArray()) {
final NodeList nodes = ((Element)(xmlObj.getDomNode())).getElementsByTagName("specimen");
for (int i = 0; i < nodes.getLength(); i++) {
final Specimen newSpecimen = newTaxon.newSpecimen();
final Element specimenXML = (Element)(nodes.item(i));
final String collectionID = specimenXML.getAttribute("collection");
if ((collectionID != null) && (!collectionID.equals(""))) {
newSpecimen.setCollectionCode(this.getTerm(collectionID));
}
newSpecimen.setCatalogID(specimenXML.getAttribute("accession"));
}
}
this.data.addTaxon(newTaxon);
}
}
private void parseMatrix(AbstractObsMatrix matrix) {
final Map<String, Map<String, State>> matrixMap = new HashMap<String, Map<String, State>>();
for (AbstractObsRow row : matrix.getRowArray()) {
final String otuID = row.getOtu();
if (otuID != null) {
final Map<String, State> currentTaxonMap = new HashMap<String, State>();
matrixMap.put(otuID, currentTaxonMap);
for (AbstractObs cell : row.getCellArray()) {
final String characterID = cell.getChar() != null ? cell.getChar().getStringValue() : null;
final String stateID = cell.getState() != null ? cell.getState().getStringValue() : null;
final State state = this.allStates.get(stateID);
if (characterID != null && state != null) {
currentTaxonMap.put(characterID, state);
}
}
}
}
this.data.setMatrixData(matrixMap);
}
private void parseMetadata(Dict metadataDict) {
final Element any = NeXMLUtil_1_0.getFirstChildWithTagName(((Element)(metadataDict.getDomNode())), "any");
if (any != null) {
final Element curators = NeXMLUtil_1_0.getFirstChildWithTagName(any, "curators");
this.data.setCurators(curators != null ? NeXMLUtil_1_0.getTextContent(curators) : null);
final Element publication = NeXMLUtil_1_0.getFirstChildWithTagName(any, "publication");
this.data.setPublication(publication != null ? NeXMLUtil_1_0.getTextContent(publication) : null);
final Element pubNotes = NeXMLUtil_1_0.getFirstChildWithTagName(any, "publicationNotes");
this.data.setPublicationNotes(pubNotes != null ? NeXMLUtil_1_0.getTextContent(pubNotes) : null);
}
}
private String getComment(Annotated node) {
final Dict commentDict = NeXMLUtil_1_0.findOrCreateDict(node, NeXMLUtil_1_0.COMMENT_KEY, node.getDomNode().getOwnerDocument().createElement("string"));
for (String comment : commentDict.getStringArray()) {
return comment;
}
return null;
}
private String getFigure(Annotated node) {
final Dict figureDict = NeXMLUtil_1_0.findOrCreateDict(node, NeXMLUtil_1_0.FIGURE_KEY, node.getDomNode().getOwnerDocument().createElement("string"));
for (String figure : figureDict.getStringArray()) {
return figure;
}
return null;
}
private String getMatrixTaxon(Annotated node) {
final Dict matrixTaxonDict = NeXMLUtil_1_0.findOrCreateDict(node, NeXMLUtil_1_0.MATRIX_TAXON_KEY, node.getDomNode().getOwnerDocument().createElement("string"));
for (String matrixTaxon : matrixTaxonDict.getStringArray()) {
return matrixTaxon;
}
return null;
}
private OBOClass getTerm(String id) {
final IdentifiedObject term = this.session.getObject(id);
if (term instanceof OBOClass) {
return (OBOClass)term;
} else {
final OBOClass altTerm = this.findTermByAltID(id);
if (altTerm != null) {
return altTerm;
} else {
log().warn("Term not found; creating dangler for " + id);
this.danglers.add(id);
final OBOClass dangler = new DanglingClassImpl(id.trim());
return dangler;
}
}
}
private OBOClass findTermByAltID(String id) {
log().debug("Called alt_id search");
final Collection<IdentifiedObject> terms = this.session.getObjects();
for (IdentifiedObject object : terms) {
if (object instanceof OBOClass) {
final OBOClass term = (OBOClass)object;
if (term.getSecondaryIDs().contains(id)) {
this.secondaryIDs.add(id);
return term;
}
}
}
return null;
}
private Logger log() {
return Logger.getLogger(this.getClass());
}
}