/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.shared.helpers; import java.io.FileInputStream; import java.util.Stack; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.stream.events.XMLEvent; import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import com.mongodb.util.JSON; public class XMLToImportantChemicals { private static final String strTag = "____txt_"; private String xmlfile; private String rowTag; private String idTag; private String chemTag; private String DBS; public XMLToImportantChemicals(String f, String DBS, String rowTag, String idTag, String chemTag) { this.xmlfile = f; this.rowTag = rowTag; this.idTag = idTag; this.chemTag = chemTag; this.DBS = DBS; } public void process() { try { FileInputStream fileInputStream = new FileInputStream(this.xmlfile); XMLStreamReader xml = XMLInputFactory.newInstance().createXMLStreamReader(fileInputStream); process(xml); } catch (Exception ex) { ex.printStackTrace(); System.out.println("XML Reading error:" + this.xmlfile); System.exit(-1); } } private void process(XMLStreamReader xml) throws XMLStreamException { String tag; String root = null; Stack<DBObject> json = new Stack<DBObject>(); DBObject js; while (xml.hasNext()) { int eventType = xml.next(); while (xml.isWhiteSpace() || eventType == XMLEvent.SPACE) eventType = xml.next(); switch (eventType) { case XMLEvent.START_ELEMENT: tag = xml.getLocalName(); if (root == null) { root = tag; } else { json.push(new BasicDBObject()); } break; case XMLEvent.END_ELEMENT: tag = xml.getLocalName(); if (tag.equals(root)) { // will terminate in next iteration } else { js = json.pop(); if (json.size() == 0) { if (tag.equals(rowTag)) printEntry(js); else printUnwantedEntry(js); } else { putListStrOrJSON(json.peek(), tag, js); } } break; case XMLEvent.CHARACTERS: String txt = xml.getText(); js = json.peek(); if (js.containsField(strTag)) { txt = js.get(strTag) + txt; js.removeField(strTag); } js.put(strTag, txt); break; case XMLEvent.START_DOCUMENT: break; case XMLEvent.END_DOCUMENT: break; case XMLEvent.COMMENT: case XMLEvent.ENTITY_REFERENCE: case XMLEvent.ATTRIBUTE: case XMLEvent.PROCESSING_INSTRUCTION: case XMLEvent.DTD: case XMLEvent.CDATA: case XMLEvent.SPACE: System.out.format("%s --\n", eventType); break; } } } private void putListStrOrJSON(DBObject json, String tag, DBObject toAdd) { // if it is a string add it unencapsulated if (toAdd.keySet().size() == 1 && toAdd.containsField(strTag)) putElemOrList(json, tag, toAdd.get(strTag)); else putElemOrList(json, tag, toAdd); } private void putElemOrList(DBObject json, String tag, Object add) { // if tag already present then make it a list if (json.containsField(tag)) { BasicDBList l; Object already = json.get(tag); if (already instanceof BasicDBList) { l = (BasicDBList) already; } else { l = new BasicDBList(); l.add(already); } l.add(add); json.removeField(tag); json.put(tag, l); return; } // else, just add as is json.put(tag, add); return; } private void printEntry(DBObject json) { Object ido = json.get(idTag); Object caso = json.get(chemTag); String id = ido instanceof String ? (String) ido : ""; String cas = caso instanceof String ? (String) caso : ""; String inchi = getInchi(json); if (inchi != null) System.out.format("%s\t%s\t%s\t%s\n", this.DBS, id, inchi, JSON.serialize(json)); else System.out.format("%s\t%s\t%s\t%s\n", this.DBS, id, cas, JSON.serialize(json)); } private String getInchi(DBObject json) { // under calculated-properties.property.[{kind=InChI, value=<inchi>}] DBObject o; if (json.containsField("calculated-properties")) { o = (DBObject) json.get("calculated-properties"); if (o.containsField("property")) { o = (DBObject) o.get("property"); if (o instanceof BasicDBList) { for (Object kv : (BasicDBList) o) { o = (DBObject) kv; if (o.containsField("kind") && o.get("kind").equals("InChI") && o.containsField("value")) { return (String) o.get("value"); } } } } } return null; } private void printUnwantedEntry(DBObject json) { System.err.format("%s\t%s\t%s\t%s\n", "UNKNOWN", "ID?", "CAS?", JSON.serialize(json)); } }