/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.IO.genomeListLoader; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import edu.yu.einstein.genplay.dataStructure.genome.Clade; import edu.yu.einstein.genplay.dataStructure.genome.Genome; /** * This class makes the clade list from xml files * @author Nicolas Fourel */ public class AssemblyListLoader { private final static String XML_ASSEMBLIES_ROOT_PATH = "edu/yu/einstein/genplay/resource/assemblies/"; private final List<String> xmlAssembliesPath; private final Map<String, Clade> cladeList; private GenomeListLoader genomeParser; /** * Constructor of {@link AssemblyListLoader} * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public AssemblyListLoader () throws ParserConfigurationException, SAXException, IOException { xmlAssembliesPath = new ArrayList<String>(); cladeList = new HashMap<String, Clade>(); initGenomePath (); computeList (); } /** * Add a clade to the clade list * If the clade is already existing, this method will try to add the genome. * @param clade */ protected void addClade (Clade clade) { if (!cladeList.containsKey(clade.getName())){ cladeList.put(clade.getName(), clade); } else { for (Genome genome: clade.getGenomeList().values()){ cladeList.get(clade.getName()).addGenome(genome); } } } /** * This method read all XML files defined to build a list of clade. * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ private void computeList () throws ParserConfigurationException, SAXException, IOException { SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); for (String currentGenomePath: xmlAssembliesPath) { InputStream xml = getClass().getClassLoader().getResourceAsStream(currentGenomePath); genomeParser = new GenomeListLoader(); parser.parse(xml, genomeParser); genomeParser.computeClade(); addClade(genomeParser.getClade()); xml.close(); } } /** * @return the clade list */ public Map<String, Clade> getCladeList() { return cladeList; } /** * XML path files are set here. */ private void initGenomePath () { xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("deuterostome_c. intestinalis_2002_12_JGI_1.0_ci1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("deuterostome_c. intestinalis_2005_03_JGI_2.1_ci2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("deuterostome_lancelet_2006_03_JGI_1.0_braFlo1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_a. gambiae_2003_02_IAGEC_MOZ2_anoGam1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_a. mellifera_2004_07_Baylor_1.2_apiMel1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_a. mellifera_2005_01_Baylor_2.0_apiMel2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. erecta_2005_08_Agencourt_prelim_droEre1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. melanogaster_2003_01_BDGP_R3_dm1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. melanogaster_2004_04_BDGP_R4_dm2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. melanogaster_2006_04_BDGP_R5_dm3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. mojavensis_2005_08_Agencourt_prelim_droMoj2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. pseudoobscura_2003_08_Baylor_freeze1_dp2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. pseudoobscura_2004_11_FlyBase_1.03_dp3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. simulans_2005_04_WUGSC_mosaic_1.0_droSim1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. yakuba_2004_04_WUGSC_1.0_droYak1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("insect_d. yakuba_2005_11_WUGSC_7.1_droYak2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_chimp_2003_11_CGSC_1.1_panTro1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_chimp_2006_03_CGSC_2.1_panTro2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_dog_2004_07_Broad_canFarm1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_dog_2005_05_Broad_canFarm2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_elephant_2009_07_Broad_loxAfr3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_guinea pig_2008_02_Broad_cavPor3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_horse_2007_01_Broad_equCab1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_horse_2007_09_Broad_equCab2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2003_07_hg16.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2003_07_NCBI34.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2004_05_hg17.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2004_05_NCBI35.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2006_03_hg18.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2006_03_NCBI36.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2009_02_GRCh37.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2009_02_hg19.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2013_12_GRCh38.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_human_2013_12_hg38.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_mouse_2005_08_NCBI35_mm7.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_mouse_2006_02_NCBI36_mm8.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_mouse_2007_07_NCBI37_mm9.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_mouse_2011_12_GRCm38_mm10.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_opossum_2006_01_Broad_monDom4.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_opossum_2006_10_Broad_monDom5.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_orangutan_2007_07_WUGSC_2.0.2_ponAbe2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_pig_2009_11_SGSC_Sscrofa9.2_susScr2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_rabbit_2009_04_Broad_oryCun2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_rat_2003_06_Baylor_3.1_rn3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_rat_2004_11_Baylor_3.4_rn4.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_rat_2012_03_RGSC_5.0_rn5.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("mammal_rhesus_2006_01_MGSC_Merged_1.0_rheMac2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. brenneri_2007_01_WUGSC_4.0_caePb1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. brenneri_2008_02_WUGSC_6.0.1_caePb2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. briggsae_2002_07_WormBase_cb25.agp8_cb1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. briggsae_2007_01_WUGSC_1.0_cb3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. elegans_2004_03_WS120_ce2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. elegans_2007_01_WS170_ce4.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. elegans_2008_05_WS190_ce6.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. japonica_2008_03_WUGSC_3.0.2_caeJap1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. remanei_2006_03_WUGSC_1.0_caeRem2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_c. remanei_2007_05_WUGSC_15.0.1_caeRem3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("nematode_p. pacificus_2007_02_WUGSC_5.0_priPac1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("other_s. cerevisiae_2003_10_SGD_sacCer1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("other_s. cerevisiae_2008_07_SGD_sacCer2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("other_sea hare_2008_09_Broad_2.0_aplCal1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("plant_a. thaliana_2010_11_TAIR10.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("S. pombe_2002_02(Nature_paper).xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("S. pombe_2011_05(GFF).xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_chicken_2004_02_WUGSC_1.0_galGal2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_chicken_2006_05_WUGSC_2.1_galGal3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_fugu_2002_08_JGI_3.0_fr1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_fugu_2004_10_JGI_4.0_fr2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_lizard_2007_02_Broad_anoCar1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_medaka_2005_10_NIG_UT_MEDAKA1_oryLat2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_stickleback_2006_02_Broad_gasAcu1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_tetraodon_2004_02_Genoscope_7_tetNig1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_tetraodon_2007_03_Genoscope_8.0_tetNig2.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_zebra finch_2008_07_WUGSC_3.2.4_taeGut1.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_zebrafish_2005_04_Zv5_danRer3.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_zebrafish_2006_03_Zv6_danRer4.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_zebrafish_2007_07_Zv7_danRer5.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("vertebrate_zebrafish_2010_07_Zv9_danRer7.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus1(9629378)_2010_08.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus2(9629267)_2010_04.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus3(9625875)_2011_03.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus4(139424470)_2010_03.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus4type1(82503188)_2010_03.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus5(155573622)_2012_08.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus6A(224020395)_2010_04.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus6B(9633069)_2010_08.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus7(51874225)_2010_04.xml")); xmlAssembliesPath.add(XML_ASSEMBLIES_ROOT_PATH.concat("virus_herpesvirus_herpesvirus8(139472801)_2010_04.xml")); } }