/******************************************************************************* * Copyright 2014 Virginia Polytechnic Institute and State University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.vt.vbi.patric.common; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import javax.xml.namespace.QName; import org.apache.axiom.om.OMAbstractFactory; import org.apache.axiom.om.OMElement; import org.apache.axiom.om.OMFactory; import org.apache.axiom.om.OMNamespace; import org.apache.axis2.AxisFault; import org.apache.axis2.addressing.EndpointReference; import org.apache.axis2.client.Options; import org.apache.axis2.client.ServiceClient; import org.json.simple.JSONArray; import org.json.simple.JSONObject; @SuppressWarnings("unchecked") public class KLEIOInterface { private final String kleioKey = "578e2b24fcf9c04e5464384a1ee7836e5e2327596c586189f76e2da0"; private final String endpointUrl = "http://nactem4.mc.man.ac.uk:18080/Kleio/services/KleioPort"; ServiceClient client = null; private OMFactory omFactory = null; private OMNamespace nsWeb = null; private OMNamespace nsXsi = null; private final String facets = "GENE,PROTEIN,MESHHEADING,METABOLITE,DRUG,BACTERIA,SYMPTOM,DISEASE,ORGAN,DIAG_PROC,THERAPEUTIC_PROC,INDICATOR"; private Map<String, String> hash = new HashMap<String, String>(); public KLEIOInterface() { try { client = new ServiceClient(); Options opts = new Options(); opts.setTo(new EndpointReference(endpointUrl)); opts.setTimeOutInMilliSeconds(300000); client.setOptions(opts); omFactory = OMAbstractFactory.getOMFactory(); nsWeb = omFactory.createOMNamespace("http://webservice.kleio.nactem.ac.uk/", "web"); nsXsi = omFactory.createOMNamespace("http://www.w3.org/2001/XMLSchema-instance", "xsi"); } catch (AxisFault e) { e.printStackTrace(); } } private OMElement getKLEIOKey() { OMElement e = omFactory.createOMElement("key", null); e.setText(kleioKey); return e; } private OMElement getOptionalCriteria(String criteria, String value) { OMElement e = omFactory.createOMElement(criteria, null); if (value == null) { if (criteria.equalsIgnoreCase("sortby")) { value = "RELEVANCE"; } else if (criteria.equalsIgnoreCase("nonullabstract")) { value = "false"; } else if (criteria.equalsIgnoreCase("startat")) { value = "0"; } else if (criteria.equalsIgnoreCase("hitsperpage")) { value = "10"; } } e.addChild(omFactory.createOMText(value)); return e; } public JSONObject getDocumentList(String query, String sortby, boolean nullabstract, int startat, int hitsperpage) { JSONObject r = new JSONObject(); JSONArray articles = new JSONArray(); OMElement res = null; OMElement req = omFactory.createOMElement("getDocumentList", nsWeb); req.addChild(getKLEIOKey()); req.addChild(getQueryNodes(query)); // optional req.addChild(getOptionalCriteria("sortBy", sortby)); req.addChild(getOptionalCriteria("noNullAbstract", "" + nullabstract)); req.addChild(getOptionalCriteria("startAt", "" + startat)); req.addChild(getOptionalCriteria("hitsPerPage", "" + hitsperpage)); QName qnTotal = new QName("", "totalHits"); QName qnCitations = new QName("", "citations"); QName qnDate = new QName("", "dateString"); QName qnJournal = new QName("", "journalCite"); QName qnSnippet = new QName("", "docSnippet"); QName qnPMID = new QName("", "pmid"); QName qnTitle = new QName("", "title"); QName qnAuthors = new QName("", "authors"); try { res = client.sendReceive(req); OMElement rtrn = res.getFirstElement(); OMElement totalHits = rtrn.getFirstChildWithName(qnTotal); System.out.println(totalHits.getText()); r.put("total", totalHits.getText()); if (rtrn.getFirstChildWithName(qnCitations) != null) { Iterator<OMElement> itr = rtrn.getFirstChildWithName(qnCitations).getChildElements(); OMElement citation = null; while (itr.hasNext()) { citation = itr.next(); JSONObject article = new JSONObject(); article.put("pmid", citation.getFirstChildWithName(qnPMID).getText()); String title = citation.getFirstChildWithName(qnTitle).getText(); StringBuffer z = new StringBuffer(); for (int i = 0; i < title.length(); i++) { char c = title.charAt(i); if (c > 127 || c == '"' || c == '<' || c == '>') { z.append("&#" + (int) c + ";"); } else { z.append(c); } } title = z.toString(); article.put("title", title); article.put("date", citation.getFirstChildWithName(qnDate).getText()); article.put("journal", citation.getFirstChildWithName(qnJournal).getText()); article.put("snippet", citation.getFirstChildWithName(qnSnippet).getText()); // authors JSONArray authors = new JSONArray(); Iterator<OMElement> itrAuthors = citation.getFirstChildWithName(qnAuthors).getChildElements(); OMElement a = null; while (itrAuthors.hasNext()) { a = itrAuthors.next(); authors.add(a.getText()); } article.put("authors", authors); articles.add(article); } } r.put("result", articles); } catch (AxisFault e) { e.printStackTrace(); } return r; } public JSONObject getFacets(String query) { JSONObject r = new JSONObject(); JSONArray facets = new JSONArray(); boolean flag = false; OMElement res = null; OMElement req = omFactory.createOMElement("getFacets", nsWeb); req.addChild(getKLEIOKey()); req.addChild(getQueryNodes(query)); QName qnFacetName = new QName("", "facetName"); QName qnFacetFreq = new QName("", "facetFreq"); QName qnFacetValues = new QName("", "facetValues"); QName qnName = new QName("", "name"); QName qnFreq = new QName("", "freq"); try { res = client.sendReceive(req); // Iterator<OMElement> itr = res.getChildrenWithLocalName("getFacetResponse"); Iterator<OMElement> itr = res.getChildrenWithNamespaceURI(nsWeb.getNamespaceURI()); OMElement e = null; OMElement v = null; while (itr.hasNext()) { e = itr.next(); // <return> if (!e.getFirstChildWithName(qnFacetName).getText().toString().equals("HUMAN_PHENOM") && !e.getFirstChildWithName(qnFacetName).getText().toString().equals("GENERAL_PHENOM") && !e.getFirstChildWithName(qnFacetName).getText().toString().equals("NATURAL_PHENOM") && !e.getFirstChildWithName(qnFacetName).getText().toString().equals("PUBLICATIONTYPE")) { flag = false; JSONObject facet = new JSONObject(); if (e.getFirstChildWithName(qnFacetFreq).getText().toString().equals("0")) { } else { if (e.getFirstChildWithName(qnFacetName).getText().toString().equals("DIAG_PROC")) { facet.put("text", "<span style=\"color: #CC6600; margin: 0; padding: 0 0 2px; font-weight: bold;\">Diagnostic Procedure</span><span style=\"color: #888;\"> (" + e.getFirstChildWithName(qnFacetFreq).getText() + "+) </span>"); } else if (e.getFirstChildWithName(qnFacetName).getText().toString().equals("THERAPEUTIC_PROC")) { facet.put("text", "<span style=\"color: #CC6600; margin: 0; padding: 0 0 2px; font-weight: bold;\">Therapeutic Procedure</span><span style=\"color: #888;\"> (" + e.getFirstChildWithName(qnFacetFreq).getText() + "+) </span>"); } else { facet.put( "text", "<span style=\"color: #CC6600; margin: 0; padding: 0 0 2px; font-weight: bold;\">" + e.getFirstChildWithName(qnFacetName).getText().substring(0, 1).toUpperCase() + e.getFirstChildWithName(qnFacetName).getText().substring(1).toLowerCase() + "</span><span style=\"color: #888;\"> (" + e.getFirstChildWithName(qnFacetFreq).getText() + "+)</span>"); } facet.put("id", e.getFirstChildWithName(qnFacetName).getText()); facet.put("leaf", false); facet.put("expanded", true); facet.put("renderstep", "1"); } if (Integer.parseInt(e.getFirstChildWithName(qnFacetFreq).getText()) > 0) { Iterator<OMElement> itrValues = e.getFirstChildWithName(qnFacetValues).getChildElements(); JSONArray jsonValues = new JSONArray(); int more_count = 0; JSONArray morechildren = new JSONArray(); while (itrValues.hasNext()) { v = itrValues.next(); // <facetValue> if (v.getFirstChildWithName(qnFreq).getText().toString().equals("0")) { } else { more_count++; JSONObject jsonValue = null; JSONObject moreValue = null; if (more_count == 4) { jsonValue = new JSONObject(); jsonValue.put("parentID", e.getFirstChildWithName(qnFacetName).getText() + "_more"); jsonValue.put("id", v.getFirstChildWithName(qnName).getText() + "##" + e.getFirstChildWithName(qnFacetName)); jsonValue.put("leaf", true); jsonValue.put("text", v.getFirstChildWithName(qnName).getText().substring(0, 1).toUpperCase() + v.getFirstChildWithName(qnName).getText().substring(1).toLowerCase() + "<span style=\"color: #888;\"> (" + v.getFirstChildWithName(qnFreq).getText() + "+)</span>"); jsonValue.put("checked", keepState(v.getFirstChildWithName(qnName).getText(), e.getFirstChildWithName(qnFacetName).getText())); jsonValue.put("renderstep", "2"); morechildren.add(jsonValue); while (itrValues.hasNext()) { v = itrValues.next(); jsonValue = new JSONObject(); jsonValue.put("parentID", e.getFirstChildWithName(qnFacetName).getText() + "_more"); jsonValue.put("id", v.getFirstChildWithName(qnName).getText() + "##" + e.getFirstChildWithName(qnFacetName)); jsonValue.put("leaf", true); jsonValue.put("text", v.getFirstChildWithName(qnName).getText().substring(0, 1).toUpperCase() + v.getFirstChildWithName(qnName).getText().substring(1).toLowerCase() + "<span style=\"color: #888;\"> (" + v.getFirstChildWithName(qnFreq).getText() + "+)</span>"); jsonValue.put("renderstep", "2"); jsonValue.put("checked", keepState(v.getFirstChildWithName(qnName).getText(), e.getFirstChildWithName(qnFacetName).getText())); morechildren.add(jsonValue); } moreValue = new JSONObject(); moreValue.put("parentID", e.getFirstChildWithName(qnFacetName).getText()); moreValue.put("id", e.getFirstChildWithName(qnFacetName).getText() + "_more"); moreValue.put("leaf", false); moreValue.put("text", "<b>more</b>"); moreValue.put("renderstep", "3"); JSONObject lessValue = new JSONObject(); lessValue.put("parentID", e.getFirstChildWithName(qnFacetName).getText()); lessValue.put("id", e.getFirstChildWithName(qnFacetName).getText() + "_less"); lessValue.put("leaf", true); lessValue.put("text", "<b>less</b>"); lessValue.put("renderstep", "3"); morechildren.add(lessValue); moreValue.put("children", morechildren); jsonValues.add(moreValue); } else { jsonValue = new JSONObject(); jsonValue.put("parentID", e.getFirstChildWithName(qnFacetName).getText()); jsonValue.put("id", v.getFirstChildWithName(qnName).getText() + "##" + e.getFirstChildWithName(qnFacetName)); jsonValue.put("leaf", true); jsonValue.put("text", v.getFirstChildWithName(qnName).getText().substring(0, 1).toUpperCase() + v.getFirstChildWithName(qnName).getText().substring(1).toLowerCase() + "<span style=\"color: #888;\"> (" + v.getFirstChildWithName(qnFreq).getText() + "+)</span>"); jsonValue.put("checked", keepState(v.getFirstChildWithName(qnName).getText(), e.getFirstChildWithName(qnFacetName).getText())); jsonValues.add(jsonValue); jsonValue.put("renderstep", "2"); } } } if (!v.getFirstChildWithName(qnFreq).getText().toString().equals("0")) { facet.put("children", jsonValues); if (flag == true) { facet.put("expanded", true); } } } if (!e.getFirstChildWithName(qnFacetFreq).getText().toString().equals("0")) { facets.add(facet); } } r.put("result", facets); } } catch (AxisFault e) { e.printStackTrace(); } return r; } public boolean keepState(String text, String facet) { boolean flag = false; Iterator<?> it = hash.entrySet().iterator(); while (it.hasNext()) { Map.Entry<?, ?> pairs = (Map.Entry<?, ?>) it.next(); if (!facet.equals("CONTENT") || !facet.equals("content")) { String[] pairs_array = pairs.getValue().toString().split(";;"); if (pairs_array.length > 1) { for (int i = 0; i < pairs_array.length; i++) { if (text.equals(pairs_array[i]) && facet.equals(pairs.getKey().toString())) { flag = true; } } } else { if (text.equals(pairs.getValue().toString()) && facet.equals(pairs.getKey().toString())) { flag = true; } } } } return flag; } private OMElement getQueryNodes(String query) { System.out.print("KLEIO query" + query); String[] splitted = query.split(" AND | OR "); if (splitted.length > 1) { for (int i = 0; i < splitted.length; i++) { String[] row = splitted[i].split(":"); if (row[0] != null && facets.indexOf(row[0]) < 0) { if (hash.containsKey("content")) { String exists = hash.get("content") + ";;" + row[0].trim(); hash.put("content", exists); } else { hash.put("content", row[0].trim()); } } else { if (hash.containsKey(row[0].trim())) { String exists = hash.get(row[0].trim()) + ";;" + row[1].trim(); hash.put(row[0].trim(), exists); } else { hash.put(row[0].trim(), row[1].trim()); } } } } else { splitted = query.split(":"); if (splitted[0] != null && facets.indexOf(splitted[0]) < 0) hash.put("content", splitted[0].trim()); else hash.put(splitted[0].trim(), splitted[1].trim()); } Iterator<?> it = hash.entrySet().iterator(); OMElement e = omFactory.createOMElement("query", null); OMElement eall = omFactory.createOMElement("node", null); if (hash.size() > 1) { eall.addAttribute("type", "web:andNode", nsXsi); } while (it.hasNext()) { Map.Entry<?, ?> pairs = (Map.Entry<?, ?>) it.next(); OMElement etemp = omFactory.createOMElement("node", null); if (pairs.getKey().equals("content") || pairs.getKey().equals("CONTENT")) { String[] pairs_array = pairs.getValue().toString().split(";;"); if (pairs_array.length > 1) { etemp.addAttribute("type", "web:andNode", nsXsi); for (int i = 0; i < pairs_array.length; i++) { OMElement eNode = omFactory.createOMElement("node", null); eNode.addAttribute("type", "web:stringNode", nsXsi); OMElement eValue = omFactory.createOMElement("value", null); eValue.setText(pairs_array[i]); eNode.addChild(eValue); etemp.addChild(eNode); } } else { etemp.addAttribute("type", "web:stringNode", nsXsi); OMElement eValue = omFactory.createOMElement("value", null); eValue.setText(pairs.getValue().toString()); etemp.addChild(eValue); } } else { String[] pairs_array = pairs.getValue().toString().split(";;"); if (pairs_array.length > 1) { etemp.addAttribute("type", "web:orNode", nsXsi); for (int i = 0; i < pairs_array.length; i++) { OMElement eNode = omFactory.createOMElement("node", null); eNode.addAttribute("type", "web:facetNode", nsXsi); OMElement eValue1 = omFactory.createOMElement("facet", null); OMElement eValue2 = omFactory.createOMElement("value", null); eValue1.setText(pairs.getKey().toString()); eValue2.setText(pairs_array[i]); eNode.addChild(eValue1); eNode.addChild(eValue2); etemp.addChild(eNode); } } else { etemp.addAttribute("type", "web:facetNode", nsXsi); OMElement eValue1 = omFactory.createOMElement("facet", null); OMElement eValue2 = omFactory.createOMElement("value", null); eValue1.setText(pairs.getKey().toString()); eValue2.setText(pairs.getValue().toString()); etemp.addChild(eValue1); etemp.addChild(eValue2); } } if (hash.size() > 1) { eall.addChild(etemp); } else { eall = etemp; } } e.addChild(eall); // System.out.print(e.toString()); return e; } public JSONObject getDocument(String pubmedID) { JSONObject r = new JSONObject(); OMElement req = omFactory.createOMElement("getDocument", nsWeb); OMElement res = null; QName qnAuthors = new QName("", "authors"); QName qnAbstract = new QName("", "abstract"); QName qnTitle = new QName("", "title"); QName qnJournal = new QName("", "journalCite"); QName qnPMID = new QName("", "pmid"); QName qnMesh = new QName("", "meshHeadings"); req.addChild(getKLEIOKey()); OMElement documentID = omFactory.createOMElement("documentId", null); documentID.setText(pubmedID); req.addChild(documentID); try { res = client.sendReceive(req); OMElement article = res.getFirstElement(); // authors JSONArray authors = new JSONArray(); Iterator<OMElement> itrAuthors = article.getFirstChildWithName(qnAuthors).getChildElements(); OMElement a = null; while (itrAuthors.hasNext()) { a = itrAuthors.next(); authors.add(a.getText()); } r.put("authors", authors); r.put("abstract", article.getFirstChildWithName(qnAbstract).getText()); String title = article.getFirstChildWithName(qnTitle).getText(); // System.out.print("title"+title); StringBuffer z = new StringBuffer(); for (int i = 0; i < title.length(); i++) { char c = title.charAt(i); if (c > 127 || c == '"' || c == '<' || c == '>') { z.append("&#" + (int) c + ";"); } else { z.append(c); } } title = z.toString(); r.put("title", title); r.put("journal", article.getFirstChildWithName(qnJournal).getText()); r.put("pmid", article.getFirstChildWithName(qnPMID).getText()); // mesh JSONArray mesh = new JSONArray(); Iterator<OMElement> itrMesh = article.getFirstChildWithName(qnMesh).getChildElements(); OMElement m = null; while (itrMesh.hasNext()) { m = itrMesh.next(); mesh.add(m.getText()); } r.put("mesh", mesh); } catch (AxisFault e) { e.printStackTrace(); } return r; } public JSONArray getFacetNames() { JSONArray r = new JSONArray(); OMElement res = null; OMElement req = omFactory.createOMElement("getFacetNames", nsWeb); req.addChild(getKLEIOKey()); try { res = client.sendReceive(req); Iterator<OMElement> itr = res.getChildrenWithLocalName("getFacetNamesResponse"); while (itr.hasNext()) { OMElement e = itr.next(); r.add(e.getText()); } } catch (AxisFault e) { e.printStackTrace(); } return r; } public JSONObject getNamedEntities(String pubmedID) { JSONObject r = new JSONObject(); JSONArray results = new JSONArray(); OMElement req = omFactory.createOMElement("getNamedEntities", nsWeb); OMElement res = null; // QName qnannotation = new QName("","annotation"); QName qnexternalReferences = new QName("", "externalReferences"); QName qnid = new QName("", "id"); QName qnnamespace = new QName("", "namespace"); QName qnbegin = new QName("", "begin"); QName qnend = new QName("", "end"); QName qnexpandedForm = new QName("", "expandedForm"); QName qnlocation = new QName("", "location"); QName qnnamedEntity = new QName("", "namedEntity"); QName qnnormalisedForm = new QName("", "normalisedForm"); QName qnshortForm = new QName("", "shortForm"); QName qnsurfaceForm = new QName("", "surfaceForm"); req.addChild(getKLEIOKey()); OMElement documentID = omFactory.createOMElement("documentId", null); documentID.setText(pubmedID); req.addChild(documentID); try { res = client.sendReceive(req); Iterator<OMElement> itr = res.getChildrenWithLocalName("getNamedEntitiesResponse"); OMElement entities = null; while (itr.hasNext()) { entities = itr.next(); JSONObject entity = new JSONObject(); entity.put("begin", entities.getFirstChildWithName(qnbegin).getText()); entity.put("end", entities.getFirstChildWithName(qnend).getText()); entity.put("expandedForm", entities.getFirstChildWithName(qnexpandedForm).getText()); entity.put("location", entities.getFirstChildWithName(qnlocation).getText()); if (entities.getFirstChildWithName(qnnamedEntity) != null) { entity.put("namedEntity", entities.getFirstChildWithName(qnnamedEntity).getText()); } entity.put("normalisedForm", entities.getFirstChildWithName(qnnormalisedForm).getText()); entity.put("shortForm", entities.getFirstChildWithName(qnshortForm).getText()); entity.put("surfaceForm", entities.getFirstChildWithName(qnsurfaceForm).getText()); Iterator<OMElement> itrreferences = entities.getFirstChildWithName(qnexternalReferences).getChildElements(); JSONObject extreferences = new JSONObject(); OMElement references = null; while (itrreferences.hasNext()) { references = itrreferences.next(); JSONObject reference = new JSONObject(); reference.put("namespace", references.getFirstChildWithName(qnnamespace).getText()); reference.put("id", references.getFirstChildWithName(qnid).getText()); extreferences.put("reference", reference); } entity.put("externalReferences", extreferences); results.add(entity); } r.put("result", results); } catch (AxisFault e) { e.printStackTrace(); } return r; } public void clean() { try { client.cleanup(); client.cleanupTransport(); } catch (AxisFault e) { e.printStackTrace(); } } }