/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.variation; import java.sql.Connection; import java.util.List; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.RowMapper; import org.ensembl.healthcheck.util.SqlTemplate; /** * Checks database entries do not contain unsupported characters. */ public class CheckChar extends SingleDatabaseTestCase { /** * Creates a new instance of CheckChar */ public CheckChar() { addToGroup("variation-release"); setDescription("Check that imported names/descriptions contains only supported characters"); setTeamResponsible(Team.VARIATION); } /** * Check only for Phenotype table currently * * @param dbre * The database to check. * @return True if the test passed */ public boolean run(final DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { /* * Will extract a list of phenotype.descriptions and check for unsupported char & short names */ boolean char_ok = true; List<String> data = getSqlTemplate(con).queryForDefaultObjectList( "select description from phenotype where description is not null", String.class); for (int i = 0; i < data.size(); i++) { String input = data.get(i); // check for unusually short descriptions if(input.length() < 4){ result = false; ReportManager.problem(this, con, "phenotype: " + input + " is suspiciously short"); } // check for characters which will be interpreted a new lines if( input.contains("\n") ){ result = false; ReportManager.problem(this, con, "phenotype: " + input + " contains a newline "); } // check for phenotype descriptions suggesting no phenotype boolean name_ok = checkNonTerms(input); if(name_ok == false){ result = false; ReportManager.problem(this, con, "phenotype: " + input + " is not useful"); } // check for unsupported individual character char_ok = checkUnsupportedChar(input); if(char_ok == false){ result = false; ReportManager.problem(this, con, "phenotype: \""+ input +"\" has suspect start or unsupported characters"); } } } catch (Exception e) { ReportManager.problem(this, con, "HealthCheck generated an exception: " + e.getMessage()); result = false; } if (result) { // if there were no problems, just inform for the interface to pick the HC ReportManager.correct(this, con, "CheckChar healthcheck passed without any problem"); } return result; } // -------------------------------------------------------------- public boolean checkUnsupportedChar( String input) { boolean is_ok = true; int len = input.length(); for (int i =0; i< len; i++){ char test_value= input.charAt(i); //get ascii code int ascii_val = (int) test_value; // check code in supported range if(ascii_val < 32 || ascii_val > 126 || ascii_val == 60 || ascii_val == 62 ){ is_ok = false; } // also check first character makes sense if(i == 0 && ( ascii_val < 48 || (ascii_val > 57 && ascii_val < 65) || (ascii_val > 90 && ascii_val < 97) || ascii_val > 122)){ is_ok = false; } } return is_ok; } public boolean checkNonTerms( String input) { boolean is_ok = true; String[] junk = {"None", "Not provided", "not specified", "Not in OMIM", "Variant of unknown significance", "not_provided", "?","." }; int len = junk.length; for (int i =0; i< len; i++){ if( input.equalsIgnoreCase( junk[i] ) ){ is_ok = false; } } return is_ok; } // -------------------------------------- }