/******************************************************************************* * Copyright 2014 Virginia Polytechnic Institute and State University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.vt.vbi.patric.common.xmlHandler; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; @SuppressWarnings("unchecked") public class PeptidomeHandler extends DefaultHandler { private JSONArray list = null; private JSONObject item = null; private String currentElement = ""; private StringBuffer sbSummary = null; private StringBuffer sbTitle = null; private boolean isReadingPubMed = false; private boolean isReadingTitle = false; private boolean isReadingAccession = true; private StringBuffer sbAccession = null; private StringBuffer sbTaxName = null; private StringBuffer sbSampleCount = null; private StringBuffer sbProteinCount = null; private StringBuffer sbPeptideCount = null; private StringBuffer sbSpectraCount = null; public JSONArray getParsedJSON() { return list; } @Override public void startDocument() throws SAXException { this.list = new JSONArray(); } @Override public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equalsIgnoreCase("DocumentSummary")) { item = new JSONObject(); sbSummary = new StringBuffer(); sbTitle = new StringBuffer(); isReadingTitle = true; isReadingAccession = true; sbAccession = new StringBuffer(); sbTaxName = new StringBuffer(); sbSampleCount = new StringBuffer(); sbProteinCount = new StringBuffer(); sbPeptideCount = new StringBuffer(); sbSpectraCount = new StringBuffer(); } if (qName.equalsIgnoreCase("accession") || qName.equalsIgnoreCase("entryType") || qName.equalsIgnoreCase("title") || qName.equalsIgnoreCase("summary") || qName.equalsIgnoreCase("int") || qName.equalsIgnoreCase("taxname") || qName.equalsIgnoreCase("proteincount") || qName.equalsIgnoreCase("peptidecount") || qName.equalsIgnoreCase("spectracount") || qName.equalsIgnoreCase("samplecount")) { currentElement = qName; } else if (qName.equalsIgnoreCase("pubmedids")) { currentElement = ""; isReadingPubMed = true; } else { currentElement = ""; } } @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("DocumentSummary")) { item.put("summary", sbSummary.toString()); item.put("title", sbTitle.toString()); item.put("Accession", sbAccession.toString()); item.put("TaxName", sbTaxName.toString()); item.put("SampleCount", sbSampleCount.toString()); item.put("ProteinCount", sbProteinCount.toString()); item.put("PeptideCount", sbPeptideCount.toString()); item.put("SpectraCount", sbSpectraCount.toString()); // link item.put("link_data_file", "ftp://ftp.ncbi.nih.gov/pub/peptidome/studies/PSEnnn/" + item.get("Accession") + "/"); list.add(item); this.item = null; this.sbSummary = null; this.sbTitle = null; this.sbAccession = null; this.sbTaxName = null; this.sbSampleCount = null; this.sbProteinCount = null; this.sbPeptideCount = null; this.sbSpectraCount = null; } else if (qName.equalsIgnoreCase("pubmedids")) { isReadingPubMed = false; } } @Override public void characters(char[] ch, int start, int length) throws SAXException { String tmpVal = new String(ch, start, length); if (currentElement.equals("Summary")) { sbSummary.append(tmpVal); } else if (currentElement.equals("Title") && isReadingTitle == true) { sbTitle.append(tmpVal); isReadingTitle = false; } else if (currentElement.equals("Accession") && isReadingAccession == true) { sbAccession.append(tmpVal); isReadingAccession = false; } else if (currentElement.equals("TaxName")) { sbTaxName.append(tmpVal); } else if (currentElement.equals("SampleCount")) { sbSampleCount.append(tmpVal); } else if (currentElement.equals("ProteinCount")) { sbProteinCount.append(tmpVal); } else if (currentElement.equals("PeptideCount")) { sbPeptideCount.append(tmpVal); } else if (currentElement.equals("SpectraCount")) { sbSpectraCount.append(tmpVal); } else if (currentElement.equals("int") && isReadingPubMed == true) { item.put("pubmed_id", tmpVal); isReadingPubMed = false; } else if (currentElement.equals("int") || currentElement.equals("Title")) { // skip // System.out.println(currentElement+":"+tmpVal); } else if (!currentElement.equals("")) { item.put(currentElement, tmpVal); } } }