/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2003 EBI, GRL * * This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.util.HashMap; import java.util.Map.Entry; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for any xrefs that are listed as "KNOWN" in the external_db table but * are actually predictions. Currently only RefSeq XP/XM xrefs. */ public class PredictedXrefs extends SingleDatabaseTestCase { /** * Create a new PredictedXrefs testcase. */ public PredictedXrefs() { setDescription("Check for predicted xrefs erroneously classed as KNOWN."); setTeamResponsible(Team.CORE); setSecondTeamResponsible(Team.GENEBUILD); } /** * This only applies to core databases. */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.SANGER_VEGA); removeAppliesToType(DatabaseType.RNASEQ); removeAppliesToType(DatabaseType.CDNA); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; // hash of external_db_name / xref accession regexps to look for. // Note patterns are MySQL patterns, not regexps // if any of these match it's an error HashMap<String,String> nameToAccessionPattern = new HashMap<String,String>(); nameToAccessionPattern.put("RefSeq_mRNA", "XM%"); nameToAccessionPattern.put("RefSeq_ncRNA", "XR%"); nameToAccessionPattern.put("RefSeq_peptide", "XP%"); Connection con = dbre.getConnection(); for(Entry<String,String> entry: nameToAccessionPattern.entrySet()) { String externalDBName = entry.getKey(); String pattern = entry.getValue(); String sql = "SELECT COUNT(*) FROM xref x, external_db e " + "WHERE x.external_db_id = e.external_db_id " + "AND e.db_name = '" + externalDBName + "' " + "AND x.dbprimary_acc LIKE '" + pattern + "'"; logger.fine("Checking for " + externalDBName + " xrefs matching " + pattern); int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem(this, con, rows + " " + externalDBName + " xrefs seem to be predictions (match " + pattern + ")\n" + "USEFUL SQL:" + sql); result = false; } else { ReportManager.correct(this, con, "No " + externalDBName + " xrefs match " + pattern); } } return result; } // run // ---------------------------------------------------------------------- } // PredictedXrefs