/*******************************************************************************
* Copyright 2014 Virginia Polytechnic Institute and State University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package edu.vt.vbi.patric.common;
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import edu.vt.vbi.patric.dao.DBShared;
import edu.vt.vbi.patric.dao.ResultType;
public class PubMedHelper {
public static String getTitleString(HashMap<String, String> key) {
DBShared conn_shared = new DBShared();
String title = null;
if (key.get("context").equalsIgnoreCase("taxon")) {
ArrayList<ResultType> taxon_names = conn_shared.getTaxonNames(key.get("ncbi_taxon_id"));
ResultType name = null;
ResultType scientific_name = null;
for (Iterator<ResultType> iter = taxon_names.iterator(); iter.hasNext();) {
name = iter.next();
if (name.containsKey("name_class") && !name.get("name_class").equals("")
&& name.get("name_class").equalsIgnoreCase("scientific name")) {
scientific_name = name;
}
}
if (scientific_name != null) {
title = scientific_name.get("name");
}
else {
title = "";
}
}
else if (key.get("context").equalsIgnoreCase("genome")) {
ResultType names = conn_shared.getNamesFromGenomeInfoId(key.get("genome_info_id"));
String organism_name = names.get("organism_name");
String genome_name = names.get("genome_name");
String qScope = key.get("scope");
if (qScope != null && qScope.equals("o")) {
title = organism_name;
}
else {
title = genome_name;
}
}
else if (key.get("context").equalsIgnoreCase("feature")) {
// getting feature info from Solr
SolrInterface solr = new SolrInterface();
try {
solr.setCurrentInstance("GenomicFeature");
}
catch (MalformedURLException e) {
e.printStackTrace();
}
JSONArray res = solr.searchSolrRecords("na_feature_id:" + key.get("feature_id"));
JSONObject feature = new JSONObject();
if (!res.isEmpty()) {
feature = (JSONObject) res.get(0);
}
String qScope = key.get("scope");
if (qScope != null && qScope.equals("g")) {
title = feature.get("genome_name").toString();
}
else {
// default, feature level
int offset1 = feature.get("genome_name").toString().indexOf(" ");
int offset2 = feature.get("genome_name").toString().indexOf(" ", offset1 + 1);
// System.out.println("offset1: "+offset1+", offset2: "+offset2+", org: " + feature.get("genome_name"));
String org = "";
if (offset2 > 0) {
org = feature.get("genome_name").toString().substring(0, offset2);
}
else {
org = feature.get("genome_name").toString().substring(0, offset1);
}
title = "(\"" + org.toLowerCase() + "\") AND (\"" + feature.get("locus_tag");
if (feature.containsKey("product")) {
title += "\" OR \"" + feature.get("product").toString().toLowerCase() + "\"";
}
if (feature.get("gene") != null) {
title += " OR \"" + feature.get("gene") + "\"";
}
if (feature.get("refseq_locus_tag") != null) {
title += " OR \"" + feature.get("refseq_locus_tag") + "\"";
}
if (feature.get("refseq_protein_id") != null) {
title += " OR \"" + feature.get("refseq_protein_id") + "\"";
}
title += ")";
}
// System.out.println("qScore="+qScope+", title="+title);
// end of Solr query
}
return title;
}
public static String getPubmedQueryString(HashMap<String, String> key) throws NullPointerException {
String title = getTitleString(key);
if (title == null || title.equals("")) {
throw new NullPointerException("title is not defined");
}
// String pubmedquery = "\""+title+"\"[ALL]";
String pubmedquery = title;
// keyword configuration
String _str_kw = "";
String qKeyword = key.get("keyword");
if (qKeyword != null && !qKeyword.equals("none")) {
Hashtable<String, Vector<String>> keywordhash = PubMedHelper.getKeywordHash();
Vector<String> querykeyv = keywordhash.get(qKeyword);
if (querykeyv != null) {
for (int i = 0; i < querykeyv.size(); i++) {
_str_kw = _str_kw + " or \"" + querykeyv.get(i) + "\"[ALL]";
}
pubmedquery = pubmedquery + " AND (\"" + qKeyword + "\"[ALL]" + _str_kw + ")";
}
}
// date
String qDate = key.get("date");
if (qDate != null && !qDate.equals("")) {
if (qDate.equals("w")) {
pubmedquery = pubmedquery + " \"last 7 days\"[dp]";
}
else if (qDate.equals("m")) {
pubmedquery = pubmedquery + " \"last 1 months\"[dp]";
}
else if (qDate.equals("y")) {
pubmedquery = pubmedquery + " \"last 1 year\"[dp]";
}
else if (qDate.equals("f")) {
Date todayDate = new Date();
SimpleDateFormat sdfToday = new SimpleDateFormat("yyyy/MM/dd");
SimpleDateFormat sdfYear = new SimpleDateFormat("yyyy");
String strToday = sdfToday.format(todayDate).toString();
String strYear = sdfYear.format(todayDate).toString();
int intNextYear = Integer.parseInt(strYear) + 1;
pubmedquery = pubmedquery + " " + strToday + ":" + intNextYear + " [dp]";
}
}
try {
pubmedquery = URLEncoder.encode(pubmedquery, "UTF8").toString();
}
catch (Exception ex) {
}
return pubmedquery;
}
public static Hashtable<String, Vector<String>> getKeywordHash() {
Hashtable<String, Vector<String>> keywordshash = new Hashtable<String, Vector<String>>();
Vector<String> keyword1 = new Vector<String>();
keyword1.add("drug");
keyword1.add("vaccine");
keyword1.add("theraputics");
keyword1.add("diagnostics");
keyword1.add("target");
keywordshash.put("Countermeasures", keyword1);
Vector<String> keyword2 = new Vector<String>();
keyword2.add("mass spectrometry");
keyword2.add("2D-gels");
keyword2.add("protein-protein interaction");
keywordshash.put("Proteomics", keyword2);
Vector<String> keyword3 = new Vector<String>();
keyword3.add("microarray");
keyword3.add("transcriptome");
keyword3.add("expression profiling");
keyword3.add("real time PCR");
keyword3.add("immune response");
keyword3.add("response to infection");
keyword3.add("host response");
keyword3.add("pathogenesis");
keyword3.add("virulence");
keyword3.add("disease response");
keywordshash.put("Gene expression", keyword3);
Vector<String> keyword4 = new Vector<String>();
keyword4.add("culture");
keyword4.add("microscopy");
keyword4.add("haemagglutination");
keyword4.add("complement fixation");
keyword4.add("ELISA");
keyword4.add("EIA");
keyword4.add("immune double diffusion");
keyword4.add("immunoelectrophoresis");
keyword4.add("latex agglutination");
keyword4.add("western blot");
keyword4.add("antibody");
keyword4.add("Polymerase chain reaction");
keyword4.add("PCR");
keyword4.add("PCR primer");
keyword4.add("western blot");
keywordshash.put("Diagnosis", keyword4);
Vector<String> keyword6 = new Vector<String>();
keyword6.add("symptom");
keyword6.add("syndrome");
keyword6.add("prognosis");
keywordshash.put("Disease", keyword6);
Vector<String> keyword7 = new Vector<String>();
keywordshash.put("Pathogenesis", keyword7);
Vector<String> keyword8 = new Vector<String>();
keywordshash.put("Prevention", keyword8);
Vector<String> keyword9 = new Vector<String>();
keywordshash.put("Host", keyword9);
Vector<String> keyword10 = new Vector<String>();
keywordshash.put("Reservoir", keyword10);
Vector<String> keyword11 = new Vector<String>();
keywordshash.put("Transmission", keyword11);
Vector<String> keyword12 = new Vector<String>();
keywordshash.put("Genome", keyword12);
Vector<String> keyword13 = new Vector<String>();
keywordshash.put("Taxonomy", keyword13);
Vector<String> keyword14 = new Vector<String>();
keyword14.add("outbreak");
keyword14.add("epidemic");
keywordshash.put("Epidemiology", keyword14);
Vector<String> keyword15 = new Vector<String>();
keyword15.add("Microarray");
keyword15.add("Expression array");
keyword15.add("Gene expression");
keyword15.add("Expression profil");
keyword15.add("Genome variation profil");
keyword15.add("RNA profil");
keyword15.add("Tiling array");
keyword15.add("ArrayCGH");
keyword15.add("ChIP-chip");
keyword15.add("SAGE");
keyword15.add("RNA-Seq");
keyword15.add("Protein microarray");
keyword15.add("Protein array");
keyword15.add("Mass spec");
keyword15.add("Protein identification");
keyword15.add("Peptide identification");
keyword15.add("2D gel");
keyword15.add("Proteomics");
keyword15.add("Protein structure");
keyword15.add("three-dimensional structure");
keyword15.add("3D structure");
keyword15.add("NMR");
keyword15.add("X-ray diffraction");
keywordshash.put("Experiment Data", keyword15);
return keywordshash;
}
}