/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.logging.Logger; public enum Species { // defined new Species and properties: taxonomy_id, assemblyprefix, stableIDprefix, alias AEDES_AEGYPTI(7159, "", "IGNORE", "aedes,aedesaegypti,aedes_aegypti"), AILUROPODA_MELANOLEUCA(9646, "ailMel", "ENSAME", "panda,giant panda,ailuropoda melanoleuca,ailuropoda_melanoleuca"), ANAS_PLATYRHYNCHOS(8839, "BGI_duck", "ENSAPL","anapla,apla,mallard,anas_platyrhynchos,aplatyrhynchos,duck,anas platyrhynchos"), ANOLIS_CAROLINENSIS(28377, "AnoCar", "ENSACA", "lizard,anole,anolis_lizard,anolis,anolis_carolinensis"), ANOPHELES_GAMBIAE(7165, "AgamP", "IGNORE", "mosquito,anopheles,agambiae,anophelesgambiae,anopheles_gambiae"), APIS_MELLIFERA(7460, "AMEL", "IGNORE", "honeybee,honey_bee,apis,amellifera,apismellifera,apis_mellifera"), ASTYANAX_MEXICANUS(7994, "AstMex", "ENSAMX", "amex,amexicanus,astmex,astyanax mexicanus,astyanax_mexicanus,cave fish"), BOS_TAURUS(9913, "UMD", "ENSBTA", "cow,btaurus,bostaurus,bos_taurus"), CAENORHABDITIS_BRIGGSAE(6238, "CBR", "IGNORE", "briggsae,cbriggsae,caenorhabditisbriggsae,caenorhabditis_briggsae"), CAENORHABDITIS_ELEGANS(6239, "WBcel", "IGNORE", "elegans,celegans,caenorhabditiselegans,caenorhabditis_elegans"), CALLITHRIX_JACCHUS(9483, "C_jacchus", "ENSCJA", "marmoset,white-tufted-ear marmoset,callithrix_jacchus,callithrix jacchus,Callithrix_jacchus,Callithrix jacchus,callithrix"), CANIS_FAMILIARIS(9615, "CanFam", "ENSCAF", "dog,doggy,cfamiliaris,canisfamiliaris,canis_familiaris"), CAVIA_PORCELLUS(10141, "CAVPOR", "ENSCPO", "guineapig,guinea_pig,cporcellus,cavia_porcellus"), CERATOTHERIUM_SIMUM_SIMUM(73337,"CerSimSim","ENSCSI","ceratotherium simum simum,ceratotherium_simum_simum,cersim,csim,csimum_simum,rhinoceros"), CHOLOEPUS_HOFFMANNI(9358, "choHof", "ENSCHO", "Sloth,Two-toed_sloth,Hoffmans_two-fingered_sloth,choloepus_hoffmanni"), CHLOROCEBUS_SABAEUS(60711,"ChlSab","ENSCSA","chlorocebus_sabaeus,chlorocebus_aethiops_sabaeus,vervet monkey,african green monkey,green monkey"), CIONA_INTESTINALIS(7719, "KH", "ENSCIN", "cionaintestinalis,ciona_int,ciona_intestinalis"), CIONA_SAVIGNYI(51511, "CSAV", "ENSCSAV", "savignyi,cionasavignyi,csavignyi,ciona_savignyi"), CRICETULUS_GRISEUS(10029, "CriGri", "ENSCGR", "hamster,chinese_hamster,cgriseus,cricetulus_griseus"), CULEX_PIPIENS(7175, "CpiJ", "CPIJ", "culex,culexpipiens,culex_pipiens"), DANIO_RERIO(7955, "GRCz", "ENSDAR", "zebrafish,danio,drerio,daniorerio,danio_rerio"), DASYPUS_NOVEMCINCTUS(9361, "DasNov", "ENSDNO", "armadillo,daisy,dasypus,nine_banded_armadillo,nine-banded_armadillo,texas_armadillo,dasypus_novemcinctus"), DIPODOMYS_ORDII(10020, "DIPORD", "ENSDOR", "ords_kangaroo_rat,ordskangaroorat,kangaroo_rat, kangaroorat , dipodomys_ordii"), DROSOPHILA_ANANASSAE(7217, "dana", "IGNORE", "drosophila,ananassae,drosophilaananassae,drosophila_ananassae,dana"), DROSOPHILA_GRIMSHAWI(7222, "dgri", "IGNORE", "drosophila,grimshawi,drosophilagrimshawi,drosophila_grimshawi,dgri"), DROSOPHILA_MELANOGASTER(7227, "BDGP", "IGNORE", "drosophila,dmelongaster,drosophilamelanogaster,drosophila_melanogaster"), DROSOPHILA_PSEUDOOBSCURA(7237, "BCM-HGSC", "IGNORE", "drosophila,pseudoobscura,drosophilapseudoobscura,drosophila_pseudoobscura,dpse"), DROSOPHILA_WILLISTONI(7260, "dwil", "IGNORE", "drosophila,willistoni,drosophilawillistonii,drosophila_willistoni,dwil"), DROSOPHILA_YAKUBA(7245, "dyak", "IGNORE", "drosophila,yakuba,drosophilayakuba,drosophila_yakuba,dyak"), ECHINOPS_TELFAIRI(9371, "TENREC", "ENSETE", "tenrec,echinops,small_madagascar_hedgehog,lesser_hedgehog_tenrec,echinops_telfairi"), EQUUS_CABALLUS(9796, "EquCab", "ENSECA", "horse,equus,mr_ed,ecaballus,equus_caballus"), ERINACEUS_EUROPAEUS(9365, "HEDGEHOG", "ENSEEU", "hedgehog,european_hedgehog,eeuropaeus,erinaceus_europaeus"), FICEDULA_ALBICOLLIS(59894, "FicAlb", "ENSFAL", "flycatcher,falbicollis,collared_flycatcher,f_albicollis,ficalb"), FELIS_CATUS(9685, "Felis_catus", "ENSFCA", "cat,fcatus,felis,domestic_cat,felis_catus"), GADUS_MORHUA(8049, "gadMor", "ENSGMO", "cod,gadus_morhua,gmorhua,atlantic_cod"), GALLUS_GALLUS(9031, "Gallus_gallus-", "ENSGAL", "chicken,chick,ggallus,gallusgallus,gallus_gallus"), GASTEROSTEUS_ACULEATUS(69293, "BROADS", "ENSGAC", "stickleback,gas_aculeatus,gasaculeatus,gasterosteusaculeatus,gasterosteus_aculeatus"), GORILLA_GORILLA(9595, "gorGor", "ENSGGO", "gorilla,gorilla_gorilla,ggor"), HETEROCEPHALUS_GLABER(10181, "HetGla", "ENSHGL", "naked_mole_rat,heterocephalus_glaber,hglaber"), HOMO_SAPIENS(9606, "GRCh", "ENS", "human,hsapiens,homosapiens,homo_sapiens"), ICTIDOMYS_TRIDECEMLINEATUS(43179, "spetri", "ENSSTO", "squirrel,stridecemlineatus,thirteen-lined_ground_squirrel,ictidomys_tridecemlineatus_arenicola,ictidomys_tridecemlineatus"), LATIMERIA_CHALUMNAE(7897,"LatCha", "ENSLAC","coelacanth,latimeria_chalumnae,latimeria,l_chalumnae,Latimeria chalumnae"), LEPISOSTEUS_OCULATUS(7918, "LepOcu","ENSLOC","spotted_gar"), LOXODONTA_AFRICANA(9785, "LoxAfr", "ENSLAF", "elephant,nelly,loxodonta,african_elephant,african_savannah_elephant,african_bush_elephant,loxodonta_africana"), MACACA_MULATTA(9544, "Mmul", "ENSMMU", "macacamulatta,rhesusmacaque,rhesus_macaque,macaque,macaca_mulatta"), NOTAMACROPUS_EUGENII(9315, "Meug", "ENSMEU", "wallaby,tammar_wallaby,natomacropuseugenii,n_eugenii,notamacropus_eugenii,Notamacropus eugenii,macropuseugenii,m_eugenii,tammarwallaby,Macropus eugenii,macropus_eugenii"), MELEAGRIS_GALLOPAVO(9103, "UMD", "ENSMGA", "turkey,common turkey,wild turkey,meleagris_gallopavo, meleagris_gallopavo"), MELOPSITTACUS_UNDULATUS(13146, "MelUnd", "ENSMUN", "budgerigar,melopsittacus_undulatus,mundulatus"), MICROCEBUS_MURINUS(30608, "Mmur", "ENSMIC", "mouse_lemur,mouselemur,microcebus,microcebus_murinus"), MONODELPHIS_DOMESTICA(13616, "BROADO", "ENSMOD", "opossum,monodelphis,mdomestica,mdomesticus,monodelphisdomestica,monodelphisdomesticus,monodelphis_domesticus,monodelphis_domestica"), MUS_MUSCULUS(10090, "GRCm", "ENSMUS", "mouse,mmusculus,musmusculus,mus_musculus"), MUS_MUSCULUS_129S1SVIMJ(10090, "129S1_SvImJ", "MGP_129S1SvImJ_", ""), MUS_MUSCULUS_AJ(10090, "A_J", "MGP_AJ_", ""), MUS_MUSCULUS_AKRJ(10090, "AKR_J", "MGP_AKRJ_", ""), MUS_MUSCULUS_BALBCJ(10090, "BALB_cJ", "MGP_BALBcJ_", ""), MUS_MUSCULUS_C3HHEJ(10090, "C3H_HeJ", "MGP_C3HHeJ_", ""), MUS_MUSCULUS_C57BL6NJ(10090, "C57BL_6NJ", "MGP_C57BL6NJ_", ""), MUS_MUSCULUS_CASTEIJ(10091, "CAST_EiJ", "MGP_CASTEiJ_", ""), MUS_MUSCULUS_CBAJ(10090, "CBA_J", "MGP_CBAJ_", ""), MUS_MUSCULUS_DBA2J(10090, "DBA_2J", "MGP_DBA2J_", ""), MUS_MUSCULUS_FVBNJ(10090, "FVB_NJ", "MGP_FVBNJ_", ""), MUS_MUSCULUS_LPJ(10090, "LP_J", "MGP_LPJ_", ""), MUS_MUSCULUS_NODSHILTJ(10090, "NOD_ShiLtJ", "MGP_NODShiLtJ_", ""), MUS_MUSCULUS_NZOHLLTJ(10090, "NZO_HlLtJ", "MGP_NZOHlLtJ_", ""), MUS_MUSCULUS_PWKPHJ(39442, "PWK_PhJ", "MGP_PWKPhJ_", ""), MUS_MUSCULUS_WSBEIJ(10092, "WSB_EiJ", "MGP_WSBEiJ_", ""), MUS_SPRETUS_SPRETEIJ(10096, "SPRET_EiJ", "MGP_SPRETEiJ_", ""), MUSTELA_PUTORIUS_FURO(9669, "MusPutFur", "ENSMPU", "ferret,domestic ferret,Mustela_putorius_furo,Mustela putorius furo"), MYOTIS_LUCIFUGUS(59463, "Myoluc", "ENSMLU", "microbat,little_brown_bat,mlucifugus,myotis,myotis_lucifugus"), NOMASCUS_LEUCOGENYS(61853, "NLEU", "ENSNLE","gibbon,nleu,nomleu,nleugogenys,nomascus_leucogenys"), OCHOTONA_PRINCEPS(9978, "OchPri", "ENSOPR", "pika,Americanpika,American_pika,oprinceps,ochotona,ochotona_princeps"), OREOCHROMIS_NILOTICUS(8128, "Orenil", "ENSONI", "tilapia,Oreochromis niloticus,oreochromis niloticus,Oreochromis_niloticus,oreochromis_niloticus,Nile tilapia,nile tilapia,Nile_tilapia,nile_tilapia,O. niloticus"), ORNITHORHYNCHUS_ANATINUS(9258, "OANA", "ENSOAN", "platypus,oanatius,ornithorhynchus_anatinus"), ORYCTEROPUS_AFER_AFER(1230840,"OryAfe","ENSOAF","aardvark,oafe,oafer_after,oryafe,orycteropus afer afer,orycteropus_afer_afer"), ORYCTOLAGUS_CUNICULUS(9986, "OryCun", "ENSOCU", "rabbit,oryctolagus,domestic_rabbit,bunny,japanese_white_rabbit,european_rabbit,oryctolagus_cuniculus"), ORYZIAS_LATIPES(8090, "MEDAKA", "ENSORL", "medaka,oryzias,japanese_medaka,japanese_rice_fish,japanese_ricefish,japanese_killifish,oryzias_latipes"), OTOLEMUR_GARNETTII(30611, "OtoGar", "ENSOGA", "bushbaby,bush_baby,galago,small_eared_galago,ogarnettii,otolemur,otolemur_garnettii"), OVIS_ARIES(9940, "Oar", "ENSOAR", "ovis_aries,oaries,oviari,sheep"), PAN_TROGLODYTES(9598, "CHIMP", "ENSPTR", "chimp,chimpanzee,ptroglodytes,pantroglodytes,pan_troglodytes"), PAPIO_ANUBIS(9555,"PapAnu", "ENSPAN", "papio_anubis"), PAPIO_HAMADRYAS(9557, "Pham", "ENSPHA", "baboon,Papio_hamadryas,papio_hamadryas,papio_hamadryas,sacred_baboon,western_baboon,red_baboon"), PELODISCUS_SINENSIS(13735, "PelSin", "ENSPSI", "Chinese_softshell_turtle,turtle,softshell_turtle,Trionyx_sinensis"), PETROMYZON_MARINUS(7757, "Pmarinus", "ENSPMA", "lamprey,sea_lamprey,pmarinus,petromyzon,petromyzon_marinus"), POECILIA_FORMOSA(48698, "PoeFor", "ENSPFO", "amazon molly,poecilia_formosa,pformosa,poefor,pfor"), PHYSETER_MACROCEPHALUS(9755,"PhyMac", "ENSPMC", "physeter_macrocephalus, sperm whale, pmac, pmacrocephalus, physeter macrocephalus, phymac"), PONGO_ABELII(9601, "PPYG", "ENSPPY", "orangutan,orang-utan,pabellii,pongo_abelii"), PROCAVIA_CAPENSIS(9813, "PROCAP", "ENSPCA", "cape_rock_hyrax,caperockhyrax,procaviacapensis,procavia_capensis"), PTEROPUS_VAMPYRUS(132908, "PTEVAM", "ENSPVA", "large_flying_fox,largeflyingfox,pteropusvampyrus,pteropus_vampyrus"), RATTUS_NORVEGICUS(10116, "Rnor", "ENSRNO", "rat,rnovegicus,rattusnorvegicus,rattus_norvegicus"), SACCHAROMYCES_CEREVISIAE(4932, "R", "IGNORE", "yeast,saccharomyces,scerevisiae,saccharomycescerevisiae,saccharomyces_cerevisiae"), SAIMIRI_BOLIVIENSIS(39432, "SaiBol", "ENSSBO", "saimiri_boliviensis,sboliviensis,squirrel_monkey,bolivian_squirrel_monkey,squirrelmonkey"), SARCOPHILUS_HARRISII(9305, "devil", "ENSSHA", "devil,Sarcophilus_harrisii,sarcophilus_harrisii,tasmanian_devil,taz"), SOREX_ARANEUS(42254, "COMMON_SHREW", "ENSSAR", "shrew,common_shrew,commonShrew,european_shrew,saraneus,sorex,sorex_araneus"), SUS_SCROFA(9823, "Sscrofa", "ENSSSC", "pig,boar,wildboar,wild_boar,susscrofa,sus_scrofa"), TAENIOPYGIA_GUTTATA(59729, "taeGut", "ENSTGU", "zebrafinch,zebra_finch,taeniopygia_guttata,taeniopygiaguttata,tguttata,poephila_guttata,taenopygia_guttata"), TAKIFUGU_RUBRIPES(31033, "FUGU", "ENSTRU", "pufferfish,fugu,frubripes,fugurubripes,fugu_rubripes,takifugu,trubripes,takifugurubripes,takifugu_rubripes"), CARLITO_SYRICHTA(1868482, "TARSYR", "ENSTSY", "philippine_tarsier,philippinetarsier,carlitosyrichta,carlito_syrichta,Carlito syrichta,tarsiussyrichta,tarsius_syrichta"), TETRAODON_NIGROVIRIDIS(99883, "TETRAODON", "IGNORE", "tetraodon,tnigroviridis,tetraodonnigroviridis,tetraodon_nigroviridis"), TUPAIA_BELANGERI(37347, "TREESHREW", "ENSTBE", "treeshrew,tbelangeri,northern_tree_shrew,common_tree_shrew,tupaia_belangeri"), TURSIOPS_TRUNCATUS(9739, "TURTRU", "ENSTTR", "bottlenosed_dolphin,dolphin,tursiopstruncatus,tursiops_truncatus"), VICUGNA_PACOS(30538, "VICPAC", "ENSVPA", "alpaca,vicugnapacos,vicugna_pacos"), XENOPUS_TROPICALIS(8364, "JGI", "ENSXET", "pipid,pipidfrog,xenopus,xtropicalis,xenopustropicalis,xenopus_tropicalis"), XIPHOPHORUS_MACULATUS(8083, "Xipmac", "ENSXMA", "xiphophorous_maculatus,platyfish,southern_platyfish"), // MASTER_SCHEMA(0, "", "", "master_schema,masterschema,schema"), HEALTHCHECK(0, "", "", ""), HELP(0, "", "", ""), NCBI_TAXONOMY(0, "", "", ""), SYSTEM(0, "", "", ""), ENSEMBL_WEBSITE(0, "", "", ""), UNKNOWN(0, "", "", ""), ANCESTRAL_SEQUENCES(0, "", "", "ancestral,ancestor"); // Taxonomy IDs - see ensembl-compara/sql/taxon.txt private static Map<Integer, Species> taxonIDToSpecies = new HashMap<Integer, Species>(); private static Map<String, Species> assemblyPrefixToSpecies = new HashMap<String, Species>(); private static Map<Species, String> vegaStableIDPrefix = new EnumMap<Species, String>(Species.class); private static Logger logger = Logger.getLogger("HealthCheckLogger"); // populate the hash tables static { for (Species s : values()) { taxonIDToSpecies.put(s.getTaxonID(), s); assemblyPrefixToSpecies.put(s.getAssemblyPrefix(), s); // we have to add to the Vega hash the 4 species with Vega annotation switch (s) { case HOMO_SAPIENS: vegaStableIDPrefix.put(Species.HOMO_SAPIENS, "OTTHUM"); break; case MUS_MUSCULUS: vegaStableIDPrefix.put(Species.MUS_MUSCULUS, "OTTMUS"); break; case CANIS_FAMILIARIS: vegaStableIDPrefix.put(Species.CANIS_FAMILIARIS, "OTTCAN"); break; case DANIO_RERIO: vegaStableIDPrefix.put(Species.DANIO_RERIO, "OTTDAR"); break; case RATTUS_NORVEGICUS: vegaStableIDPrefix.put(Species.RATTUS_NORVEGICUS, "OTTRNO"); break; case SUS_SCROFA: vegaStableIDPrefix.put(Species.SUS_SCROFA, "OTTSUS"); break; } } } private final int taxonID; private final String assemblyPrefix; private final String stableIDPrefix; private final String alias; private final Set<String> aliasSet; private Species(int tax_id, String assembly, String stableID, String alias) { this.taxonID = tax_id; this.assemblyPrefix = assembly; this.stableIDPrefix = stableID; this.alias = alias; //Build a hash set of lowercased aliases rather than using splits of aliases everytime Set<String> aliasSet = new HashSet<String>(); for(String a: alias.split(",")) { aliasSet.add(a.toLowerCase().trim()); } aliasSet.add(this.name().toLowerCase()); this.aliasSet = aliasSet; } // getters for the properties public int getTaxonID() { return taxonID; }; public String getAssemblyPrefix() { return assemblyPrefix; }; public String getStableIDPrefix() { return stableIDPrefix; }; public String getAlias() { return alias; }; // methods to mantain backwards compatibility // ----------------------------------------------------------------- /** * Resolve an alias to a Species object. * * @param speciesAlias * The alias (e.g. human, homosapiens, hsapiens) * @return The species object corresponding to alias, or Species.UNKNOWN if it cannot be resolved. */ public static Species resolveAlias(String speciesAlias) { String alias = speciesAlias.toLowerCase(); // -------------------------------------- for (Species s : values()) { if(s.aliasSet.contains(alias)) { return s; } } return Species.UNKNOWN; } // ----------------------------------------------------------------- /** * Get the taxonomy ID associated with a particular species. * * @param s * The species to look up. * @return The taxonomy ID associated with s, or "" if none is found. */ public static String getTaxonomyID(Species s) { String result = ""; result = Integer.toString(s.getTaxonID()); return result; } // ------------------------------------------------------------------------- /** * Return a Species object corresponding to a particular assembly prefix. * * @param prefix * The assembly prefix. * * @return The Species corresponding to prefix, or Species.UNKNOWN. */ public static Species getSpeciesForAssemblyPrefix(String prefix) { Species result = Species.UNKNOWN; if (assemblyPrefixToSpecies.containsKey(prefix)) { result = (Species) assemblyPrefixToSpecies.get(prefix); } else { result = Species.UNKNOWN; } return result; } // ------------------------------------------------------------------------- /** * Get the assembly prefix for a species. * * @param s * The species. * @return The assembly prefix for s. */ public static String getAssemblyPrefixForSpecies(Species s) { return (String) s.getAssemblyPrefix(); } // ------------------------------------------------------------------------- /** * Get the stable ID prefix for a species. * * @param s * The species. * @param t * The type of database. * @return The stable ID prefix for s. Note "IGNORE" is used for imported species. */ public static String getStableIDPrefixForSpecies(Species s, DatabaseType t) { String result = ""; if (t.equals(DatabaseType.CORE)) { result = (String) s.getStableIDPrefix(); } else if (t.equals(DatabaseType.VEGA)) { result = (String) vegaStableIDPrefix.get(s); } if (result == null || result.equals("")) { logger.warning("Can't get stable ID prefix for " + s.toString() + " " + t.toString() + " database"); } return result; } // ------------------------------------------------------------------------- /** * Get the BioMart table root for a species (e.g. hsapiens, mmusculus) */ public String getBioMartRoot() { String[] bits = this.name().toLowerCase().split("_"); return bits.length > 1 ? bits[0].substring(0, 1) + bits[1] : ""; } public String toString() { return this.name().toLowerCase(); } // ----------------------------------------------------------------- }