/******************************************************************************* * Copyright 2014 Virginia Polytechnic Institute and State University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.vt.vbi.patric.common.xmlHandler; import java.util.ArrayList; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; @SuppressWarnings("unchecked") public class PubMedHandler extends DefaultHandler { private JSONArray articles = null; private JSONObject article = null; private String currentElement = ""; private ArrayList<String> authors = null; private boolean isReadingArticleIds = false; private StringBuffer sbTitle = null; private StringBuffer sbSource = null; public JSONArray getParsedJSON() { return articles; } @Override public void startDocument() throws SAXException { this.articles = new JSONArray(); } @Override public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equalsIgnoreCase("DocSum")) { this.article = new JSONObject(); this.sbTitle = new StringBuffer(); this.sbSource = new StringBuffer(); } if (qName.equalsIgnoreCase("Item")) { if (atts.getValue("Name").equals("Title") || atts.getValue("Name").equals("Author") || atts.getValue("Name").equals("PubDate") || atts.getValue("Name").equals("FullJournalName") || atts.getValue("Name").equals("Source") || atts.getValue("Name").equals("pubmed") || atts.getValue("Name").equals("pmid")) { currentElement = atts.getValue("Name"); } else if (atts.getValue("Name").equals("AuthorList")) { authors = new ArrayList<String>(); } else if (atts.getValue("Name").equals("ArticleIds")) { isReadingArticleIds = true; } else { currentElement = ""; } } } @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("DocSum")) { String abbrAuthorList, fullAuthorList = null; if (authors.size() == 1) { abbrAuthorList = fullAuthorList = authors.get(0); } else if (authors.size() == 2) { abbrAuthorList = fullAuthorList = authors.get(0) + " and " + authors.get(1); } else if (authors.size() >= 3) { abbrAuthorList = authors.get(0) + " et al"; fullAuthorList = authors.get(0); for (int i = 1; i < authors.size(); i++) { fullAuthorList += ", " + authors.get(i); } } else { abbrAuthorList = fullAuthorList = "N/A"; } article.put("abbrAuthorList", abbrAuthorList); article.put("fullAuthorList", fullAuthorList); article.put("Title", sbTitle.toString()); article.put("Source", sbSource.toString()); this.articles.add(this.article); this.article = null; this.authors = null; this.sbTitle = null; this.sbSource = null; } } @Override public void characters(char[] ch, int start, int length) throws SAXException { String tmpVal = new String(ch, start, length); if (currentElement.equals("Author") && !tmpVal.trim().equals("")) { authors.add(tmpVal); } else if (currentElement.equals("pubmed") && isReadingArticleIds == true) { if (tmpVal.trim().equals("")) { isReadingArticleIds = false; } else { if (article.get("pubmed_id") != null) { tmpVal = article.get("pubmed_id") + tmpVal; } article.put("pubmed_id", tmpVal); } } else if (currentElement.equals("pmid") && isReadingArticleIds == true) { if (tmpVal.trim().equals("")) { isReadingArticleIds = false; } else { if (article.get("pubmed_id") != null) { tmpVal = article.get("pubmed_id") + tmpVal; } article.put("pubmed_id", tmpVal); } } else if (currentElement.equals("Source") && !tmpVal.trim().equals("")) { sbSource.append(tmpVal); } else if (currentElement.equals("AuthorList") || currentElement.equals("ArticleIds") || currentElement.equals("pubmed") || currentElement.equals("Author") || currentElement.equals("Source")) { // skip } else if (currentElement.equals("Title")) { sbTitle.append(tmpVal); } else if (!currentElement.equals("")) { article.put(currentElement, tmpVal.trim()); } } }