/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright (C) 2004 EBI, GRL
*
* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* An EnsEMBL Healthcheck test case that looks for broken foreign-key relationships.
*/
public class CoreForeignKeys extends SingleDatabaseTestCase {
/**
* Create an OrphanTestCase that applies to a specific set of databases.
*/
public CoreForeignKeys() {
setDescription("Check for broken foreign-key relationships.");
setTeamResponsible(Team.CORE);
setSecondTeamResponsible(Team.GENEBUILD);
}
/**
* Look for broken foreign key relationships.
*
* @param dbre
* The database to use.
* @return true if all foreign key relationships are valid.
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
// ----------------------------
result &= checkForOrphans(con, "exon", "exon_id", "exon_transcript", "exon_id", false);
result &= checkForOrphans(con, "transcript", "transcript_id", "exon_transcript", "transcript_id", false);
result &= checkForOrphans(con, "gene", "gene_id", "transcript", "gene_id", false);
result &= checkForOrphans(con, "object_xref", "xref_id", "xref", "xref_id", true);
result &= checkForOrphans(con, "xref", "external_db_id", "external_db", "external_db_id", true);
result &= checkForOrphans(con, "dna", "seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "seq_region", "coord_system_id", "coord_system", "coord_system_id", true);
result &= checkForOrphans(con, "assembly", "cmp_seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "marker_feature", "marker_id", "marker", "marker_id", true);
result &= checkForOrphans(con, "seq_region_attrib", "seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "seq_region_attrib", "attrib_type_id", "attrib_type", "attrib_type_id", true);
result &= checkForOrphans(con, "misc_feature_misc_set", "misc_feature_id", "misc_feature", "misc_feature_id", true);
result &= checkForOrphans(con, "misc_feature_misc_set", "misc_set_id", "misc_set", "misc_set_id", true);
// for a sangervega db, ignore misc_featres whcih have no annotation
if (dbre.getType() == DatabaseType.SANGER_VEGA) {
result &= checkForOrphansWithConstraint(con, "misc_feature", "misc_feature_id", "misc_attrib", "misc_feature_id",
"misc_feature_id NOT IN (select mfms.misc_feature_id from misc_feature_misc_set as mfms join misc_set as ms on mfms.misc_set_id=ms.misc_set_id and ms.code='noAnnotation')");
} else {
result &= checkForOrphans(con, "misc_feature", "misc_feature_id", "misc_attrib", "misc_feature_id", true);
}
result &= checkForOrphans(con, "misc_attrib", "attrib_type_id", "attrib_type", "attrib_type_id", true);
result &= checkForOrphans(con, "assembly_exception", "seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "assembly_exception", "exc_seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "protein_feature", "translation_id", "translation", "translation_id", true);
result &= checkForOrphans(con, "marker_synonym", "marker_id", "marker", "marker_id", true);
result &= checkForOrphans(con, "translation_attrib", "translation_id", "translation", "translation_id", true);
result &= checkForOrphans(con, "transcript_attrib", "transcript_id", "transcript", "transcript_id", true);
/*
* // now redundant (done for all tables with analysis_id) result &= checkForOrphans(con, "analysis_id", "analysis",
* "analysis_id", true); result &= checkForOrphans(con, "transcript", "analysis_id", "analysis", "analysis_id", true);
*/
result &= checkForOrphans(con, "external_synonym", "xref_id", "xref", "xref_id", true);
result &= checkForOrphans(con, "identity_xref", "object_xref_id", "object_xref", "object_xref_id", true);
result &= checkForOrphans(con, "supporting_feature", "exon_id", "exon", "exon_id", true);
result &= checkForOrphans(con, "translation", "transcript_id", "transcript", "transcript_id", true);
result &= checkForOrphans(con, "ontology_xref", "object_xref_id", "object_xref", "object_xref_id", true);
result &= checkForOrphans(con, "associated_xref", "object_xref_id", "object_xref", "object_xref_id", true);
result &= checkForOrphans(con, "associated_xref", "xref_id", "xref", "xref_id", true);
result &= checkForOrphans(con, "associated_xref", "source_xref_id", "xref", "xref_id", true);
result &= checkForOrphans(con, "dependent_xref", "object_xref_id", "object_xref", "object_xref_id", true);
result &= checkForOrphans(con, "dependent_xref", "master_xref_id", "xref", "xref_id", true);
result &= checkForOrphans(con, "dependent_xref", "dependent_xref_id", "xref", "xref_id", true);
// stable ID archive
result &= checkForOrphansWithConstraint(con, "gene_archive", "peptide_archive_id", "peptide_archive", "peptide_archive_id", "peptide_archive_id != 0");
result &= checkForOrphans(con, "peptide_archive", "peptide_archive_id", "gene_archive", "peptide_archive_id", true);
result &= checkForOrphans(con, "stable_id_event", "mapping_session_id", "mapping_session", "mapping_session_id", false);
result &= checkForOrphans(con, "gene_archive", "mapping_session_id", "mapping_session", "mapping_session_id", true);
// ----------------------------
// Check object xrefs point to existing objects
String[] types = { "Gene", "Transcript", "Translation" };
for (int i = 0; i < types.length; i++) {
result &= checkKeysByEnsemblObjectType(con, "object_xref", types[i]);
}
// ----------------------------
// Ensure that feature tables reference existing seq_regions
String[] featTabs = getCoreFeatureTables();
for (int i = 0; i < featTabs.length; i++) {
String featTab = featTabs[i];
// skip large tables as this test takes an inordinately long time
// if (featTab.equals("protein_align_feature") || featTab.equals("dna_align_feature") || featTab.equals("repeat_feature")) {
// continue;
// }
result &= checkForOrphans(con, featTab, "seq_region_id", "seq_region", "seq_region_id", true);
}
result &= checkForOrphans(con, "analysis_description", "analysis_id", "analysis", "analysis_id", true);
result &= checkForOrphans(con, "gene_attrib", "gene_id", "gene", "gene_id", true);
result &= checkForOrphans(con, "gene_attrib", "attrib_type_id", "attrib_type", "attrib_type_id", true);
result &= checkForOrphans(con, "transcript_attrib", "attrib_type_id", "attrib_type", "attrib_type_id", true);
result &= checkForOrphans(con, "translation_attrib", "attrib_type_id", "attrib_type", "attrib_type_id", true);
result &= checkForOrphans(con, "translation", "end_exon_id", "exon", "exon_id", true);
result &= checkForOrphans(con, "translation", "start_exon_id", "exon", "exon_id", true);
result &= checkForOrphans(con, "alt_allele", "gene_id", "gene", "gene_id", true);
result &= checkForOrphans(con, "marker_map_location", "map_id", "map", "map_id", true);
result &= checkForOrphans(con, "marker_map_location", "marker_id", "marker", "marker_id", true);
result &= checkForOrphans(con, "marker_map_location", "marker_synonym_id", "marker_synonym", "marker_synonym_id", true);
// 73 -> 74 core schema patch: qtl* removed
// result &= checkForOrphans(con, "qtl_feature", "qtl_id", "qtl", "qtl_id", true);
// result &= checkForOrphans(con, "qtl_synonym", "qtl_id", "qtl", "qtl_id", true);
result &= checkForOrphans(con, "assembly", "asm_seq_region_id", "seq_region", "seq_region_id", true);
result &= checkForOrphans(con, "unmapped_object", "unmapped_reason_id", "unmapped_reason", "unmapped_reason_id", true);
result &= checkForOrphans(con, "unmapped_object", "analysis_id", "analysis", "analysis_id", true);
result &= checkForOrphansWithConstraint(con, "supporting_feature", "feature_id", "dna_align_feature", "dna_align_feature_id", "feature_type = 'dna_align_feature'");
result &= checkForOrphansWithConstraint(con, "supporting_feature", "feature_id", "protein_align_feature", "protein_align_feature_id", "feature_type = 'protein_align_feature'");
result &= checkForOrphansWithConstraint(con, "transcript_supporting_feature", "feature_id", "dna_align_feature", "dna_align_feature_id", "feature_type = 'dna_align_feature'");
result &= checkForOrphansWithConstraint(con, "transcript_supporting_feature", "feature_id", "protein_align_feature", "protein_align_feature_id", "feature_type = 'protein_align_feature'");
result &= checkForOrphans(con, "density_feature", "density_type_id", "density_type", "density_type_id");
result &= checkForOrphans(con, "prediction_exon", "prediction_transcript_id", "prediction_transcript", "prediction_transcript_id");
// result &= checkForOrphans(con, "prediction_exon", "prediction_exon_id", "exon", "exon_id");
result &= checkForOrphans(con, "marker", "display_marker_synonym_id", "marker_synonym", "marker_synonym_id");
// optional relations
// 73 -> 74 core schema patch: qtl* removed
// result &= checkOptionalRelation(con, "qtl", "flank_marker_id_1", "marker", "marker_id");
// result &= checkOptionalRelation(con, "qtl", "flank_marker_id_2", "marker", "marker_id");
// result &= checkOptionalRelation(con, "qtl", "peak_marker_id", "marker", "marker_id");
result &= checkOptionalRelation(con, "unmapped_object", "external_db_id", "external_db", "external_db_id");
/*
* don't test
*
* // too slow result &= checkForOrphans(con, "repeat_feature", "repeat_consensus_id", "repeat_consensus",
* "repeat_consensus_id");
*/
// ----------------------------
// Check tables which reference the analysis table
String[] analysisTabs = getCoreTablesWithAnalysisID();
for(String analysisTab: analysisTabs) {
// skip large tables as this test takes an inordinately long time
if (analysisTab.equals("protein_align_feature") || analysisTab.equals("dna_align_feature") || analysisTab.equals("repeat_feature")) {
continue;
}
String constraint = "analysis_id IS NOT NULL";
if("object_xref".equals(analysisTab)) {
constraint += " and "+analysisTab+".analysis_id <> 0";
}
if (countOrphansWithConstraint(con, analysisTab, "analysis_id", "analysis", "analysis_id", constraint) > 0) {
ReportManager.problem(this, con, "FAILED " + analysisTab + " -> analysis using FK analysis_id relationships");
result = false;
}
}
// end new tests
// added by dr2: check that the foreign key display_marker_synonym_id points to a synonym
// for the marker
result &= checkDisplayMarkerSynonymID(con);
return result;
}
// -------------------------------------------------------------------------
public boolean checkKeysByEnsemblObjectType(Connection con, String baseTable, String type) {
// Need to handle under scores in tables here
// e.g. ProbeFeature > probe_feature
String table = type.replaceAll("([a-z])([A-Z])", "$1_$2");
table = table.toLowerCase();
// Where is ensembl_object_id used?
String column = baseTable.equals("object_xref") ? "ensembl_id" : "ensembl_object_id";
return checkForOrphansWithConstraint(con, baseTable, column, table, table + "_id", "ensembl_object_type=\'" + type + "\'");
/**
* Is this not just checkForOrphansWithConstraint?
*
*
* //int rows = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM " + baseTable + " x LEFT JOIN " + table + " ON x." + column + "=" + table
* + "." + table + // "_id WHERE x.ensembl_object_type=\'" + type + "\' AND " + table + "." + table + "_id IS NULL");
*
*
* if (rows > 0) {
*
* ReportManager.problem(this, con, rows + " rows in " + baseTable + " refer to non-existent " + table + "s"); return false;
*
* } else {
*
* ReportManager.correct(this, con, "All rows in " + baseTable + " refer to valid " + table + "s"); return true; }
**/
} // checkKeysByEnsemblObjectType
private boolean checkDisplayMarkerSynonymID(Connection con) {
boolean result = true;
// the foreign key has been checked before, but might not point to a marker_synonym
// of this markers=
int rows = DBUtils.getRowCount(con, "select count(*) from marker m where m.display_marker_synonym_id not in " + "(select ms.marker_synonym_id from marker_synonym ms where m.marker_id = ms.marker_id)");
if (rows > 0) {
// problem, there are markers that have display_marker_synonym_id that is not part of the
// synonyms for the marker
String useful_sql = "select m.marker_id, m.display_marker_synonym_id, ms1.marker_synonym_id, ms1.name from marker m, marker_synonym ms1 where m.marker_id = ms1.marker_id and m.display_marker_synonym_id not in (select ms.marker_synonym_id from marker_synonym ms where m.marker_id = ms.marker_id)";
ReportManager.problem(this, con, rows + " rows in marker table have a display_marker_synonym that is not part of the synonyms for this marker" + " Try '" + useful_sql
+ "' to find out the offending markers");
result = false;
}
return result;
} // checkDisplayMarkerSynonymID
} // CoreForeignKeys