/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.funcgen; import java.io.File; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.io.IOException; import org.apache.commons.lang.StringUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; public class CheckResultSetDBFileLink extends SingleDatabaseTestCase { String[] windows = {"30","65","130","260","450","648","950","1296"}; protected String[] windowSizes() { return windows; } public CheckResultSetDBFileLink() { addToGroup("post_regulatorybuild"); addToGroup("funcgen");//do we need this group and the funcgen-release group? addToGroup("funcgen-release"); setTeamResponsible(Team.FUNCGEN); setDescription("Checks if the binary signal (col) files exist for relevant ResultSets\n" + "Also checks dbfile_data_root subdirs to see if there are still DISPLAYABLE or if they support a regualtory build\n"); setPriority(Priority.AMBER); setEffect("Signal tracks will not display in the browser.\n" + "NOTE: RegulatorySets does something similar, but from the DataSet perspective\n " + "\tHence, consider those HC results first, before fixing these!"); setFix("Re-create files or check file names manually."); } private String getSupportedRegulatoryFeatureSet(Connection con, String subdirName){ String regFsetSQL = "SELECT fs.name from result_set rs, supporting_set ss, data_set ds, " + "supporting_set ss1, data_set ds1, feature_set fs WHERE " + "rs.result_set_id=ss.supporting_set_id and ss.type='result' and ss.data_set_id=ds.data_set_id " + "AND ds.feature_set_id=ss1.supporting_set_id and ss1.type='feature' and " + "ss1.data_set_id=ds1.data_set_id and ds1.feature_set_id=fs.feature_set_id and " + "fs.type='regulatory' and fs.name not rlike '.*_v[0-9]+$' and rs.name='" + subdirName + "'"; String regFset = null; try{ Statement stmt = con.createStatement(); ResultSet supportedRegFset = stmt.executeQuery(regFsetSQL); if((supportedRegFset != null) && supportedRegFset.next()){ regFset = supportedRegFset.getString(1); //doesn't matter if we get duplicate entries here based on //redundant rset names. catch reundant set before here } } catch(SQLException e){ e.printStackTrace(); } return regFset; } public boolean run(DatabaseRegistryEntry dbre) { boolean result=true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); int MAX_REPORT=50; //Only out 50 problems by default HashMap<String, String> rSetDBLinks = new HashMap<String, String>(); HashMap<String, String> rSetFClasses = new HashMap<String, String>(); HashMap<String, String> rSetStates = new HashMap<String, String>(); HashMap<String, String> rSetRFSets = new HashMap<String, String>(); ArrayList<String> removeableRsets = new ArrayList<String>(); String rsetInfoSQL = "SELECT rs.name, dbf.path, s1.name, rs.feature_class from result_set rs left join dbfile_registry dbf "+ "ON rs.result_set_id=dbf.table_id and dbf.table_name='result_set' left join " + "(select s.table_id, sn.name from status s, status_name sn where " + "s.status_name_id=sn.status_name_id and s.table_name='result_set' and sn.name='DISPLAYABLE') s1 " + "ON rs.result_set_id=s1.table_id"; ResultSet rsetInfo = stmt.executeQuery(rsetInfoSQL); String rsetStatus, rsetPath, rsetName, regFset, rsetFClass; //String infoString = ""; while ((rsetInfo != null) && rsetInfo.next()) { rsetName = rsetInfo.getString(1); rsetPath = rsetInfo.getString(2); rsetStatus = rsetInfo.getString(3); rsetFClass = rsetInfo.getString(4); regFset = this.getSupportedRegulatoryFeatureSet(con, rsetName); //TEST IF WE HAVE SEEN A REDUNDANTLY NAMED RESULT_SET if(rSetDBLinks.containsKey(rsetName)){ //bail out here or continue? //or could mark for deletion as we could have >2 ReportManager.problem(this, con, "Found redundant result_set naming:\t" + rsetName + "\nEither rectify in DB or updated HC to account for result_set unique key"); return false; //bail out as results maybe unsafe } if( (rsetPath != null) || (rsetStatus != null) || (regFset != null) ){ rSetFClasses.put(rsetName, rsetFClass); rSetDBLinks.put(rsetName, rsetPath); rSetStates.put(rsetName, rsetStatus); rSetRFSets.put(rsetName, regFset); } else{ removeableRsets.add(rsetName); } } if(removeableRsets.size() > 0){ //Should this be info instead? ReportManager.problem(this, con, "Found " + removeableRsets.size() + " 'removeable' result_sets i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path:\n\t" + StringUtils.join(removeableRsets, "\n\t") + "\n"); result = false; } int numRsets = rSetDBLinks.size(); //Get Base Folder ResultSet rsetDBDataRoot = stmt.executeQuery("SELECT meta_value from meta where meta_key='dbfile.data_root'"); String problemString; //For easier interpretation/reporting, build 1 problem string per result_set/subDir, if((rsetDBDataRoot != null) && rsetDBDataRoot.next()){ String root_dir = rsetDBDataRoot.getString(1); //rsetDBDataRoot.close(); // don't need this anymore as reusing stmt will close this ReportManager.problem(this, con, "Found dbfile.data_root meta key. Need to remove this once all the other failures have been resolved"); result = false; //This can be removed once we resolve the dbfile.data_root issue. /nfs/ensnfs-dev/staging/nNeed adding to config //and add species and assembly //Get distinct result_set feature_class values String fclassSQL = "SELECT distinct(feature_class) from result_set"; ResultSet rsetFclasses = stmt.executeQuery(fclassSQL); //rsetFclasses and subdirRsetIDs ResultSets interleved here from same stmt! //This may cause issues, but seemingly not the issue we are experiencing //A ResultSet object is automatically closed when the Statement object that generated it is closed, re-executed, //or used to retrieve the next result from a sequence of multiple results. << counts or ResultSets, this is very rare and can probably be ignored //This is only applicable to using execute, then a separate get method on Statment e.g. getResultSet while ((rsetFclasses != null) && rsetFclasses.next()) { String featureClass = rsetFclasses.getString(1); //TEST EXISTING DIRECTORIES ARE RESULT SETS String resultSetPath = root_dir + "/" + featureClass + "_feature"; File resultSetPathF = new File(resultSetPath); if(resultSetPathF.exists() && resultSetPathF.isDirectory()){ String[] subDirs = resultSetPathF.list(); String rsetSQL; ArrayList<String> subdirProblems = new ArrayList<String>(); Statement stmt1 = con.createStatement(); boolean seenREADME = false; for(String subDir : subDirs){ problemString = ""; //Check is not a soft link //as these are to support archives, and really need testing in another HC boolean isLink = true; String fullPath = resultSetPath + "/" + subDir; try { isLink = isSymLink(fullPath); } catch(IOException i){ i.printStackTrace(); } if(subDir.equals("README") ){ seenREADME = true; continue; } else if(isLink){ continue; } rsetSQL = "SELECT result_set_id from result_set where name='" + subDir + "'"; ResultSet subdirRsetIDs = stmt1.executeQuery(rsetSQL); if((root_dir != null) && subdirRsetIDs.next()){ //String rsetID = subdirRsetIDs.getString(1); //logger.fine("Found result_feature subdir:\t" + subDirs[i] + " with rset id\t" + rsetID); if(subdirRsetIDs.next()){ problemString += "\tCannot find unique result_set. Check manually or update HC\n"; } //CATCH SUBDIRS WHICH FOR RESULT_SETS WITHOUT DBFILE_REGISTRY/DISPLAYABLE ENTRY OR IN BUILD if(removeableRsets.contains(subDir)){ problemString += "\tAppears to be 'removeable' i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path.\n"; } } else{ problemString += "\tCannot find result_set.\n"; } if(! problemString.equals("")){ subdirProblems.add(subDir + " " + featureClass + "_feature subdir has problems:\n" + problemString); } } if(seenREADME == false){ ReportManager.problem(this, con, "No README file present in:\t" + resultSetPath); result = false; } int numProbs = subdirProblems.size(); if(numProbs != 0){ ReportManager.problem(this, con, "Found " + numProbs + " " + featureClass + "_feature subdirs with problems (use -output info for all)."); result = false; for(int i=0; i<numProbs; i++){ if(i >= MAX_REPORT){ //Both these seem to report even with when restricting to -output problem? ReportManager.info(this, con, subdirProblems.get(i).toString()); } else{ ReportManager.problem(this, con, subdirProblems.get(i).toString()); } } } else{ ReportManager.info(this, con, "Found 0 " + featureClass + "_feature subdirs with problems."); } } else{ ReportManager.problem(this, con, "Cannot test if result_set dirs are valid as path does not exist or is not a directory:\t" + resultSetPath); result = false; //Don't return here as rsetPaths in DB may now be pointing to as different path } } if(numRsets == 0){ ReportManager.problem(this, con, "dbfile_root is defined in the meta table but found no result_sets can be found"); result = false; //Could return here? } else{ // NOW CHECK EXISTING RESULT SETS File root_dir_f = new File(root_dir); if(root_dir_f.exists()){ ArrayList<String> rsetProblems = new ArrayList<String>(); Iterator<String> dbLinkIt = rSetDBLinks.keySet().iterator(); //Here we are iterating over all the rSetDBLinks twice //once for each FeatureClass //but we get the rsetFClass below Object tmpObject; while(dbLinkIt.hasNext()){ rsetName = dbLinkIt.next().toString(); //Need to bring in the class here too problemString = ""; //toString on null was failing silently here! rsetPath = ( (tmpObject = rSetDBLinks.get(rsetName)) == null) ? "NO DBFILE_REGISTRY PATH" : tmpObject.toString(); rsetStatus = ( (tmpObject = rSetStates.get(rsetName)) == null) ? "NOT DISPLAYABLE" : tmpObject.toString(); regFset = ( (tmpObject = rSetRFSets.get(rsetName)) == null) ? "NOT IN BUILD" : tmpObject.toString(); rsetFClass = rSetFClasses.get(rsetName); //Will always be defined //Report all these together for easier interpretation if( ( rsetPath.equals("NO DBFILE_REGISTRY PATH") || rsetStatus.equals("NOT DISPLAYABLE") || regFset.equals("NOT IN BUILD") ) && rsetFClass.equals("result") ){ problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS " + rsetStatus + "\n\t" + "Supports:\t" + regFset + "\n"; } else if( (rsetPath.equals("NO DBFILE_REGISTRY PATH") || rsetStatus.equals("NOT DISPLAYABLE") ) && rsetFClass.equals("dna_methylation") ){ problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS " + rsetStatus + "\n"; } if(! rsetPath.equals("NO DBFILE_REGISTRY PATH")){// NOW TEST COL FILES String rSetFinalPath = root_dir + rsetPath; File rsetFileFolder = new File(rSetFinalPath); if(rsetFileFolder.exists()){ if(rsetFClass.equals("result") ){ //String[] windows = {"30","65","130","260","450","648","950","1296"}; //for(int i=0;i<windows.length;i++){ String[] windowSizes= windowSizes(); for (String wSize : windowSizes) { String rsetWindowFileName = rSetFinalPath + "/result_features." + rsetName + "." + wSize + ".col"; File rsetWindowFile = new File(rsetWindowFileName); if(rsetWindowFile.exists()){ if(rsetWindowFile.length() == 0){ problemString += "\tEmpty file:\t" + rsetWindowFileName + "\n"; } } else { problemString += "\tFile does not exist:\t" + rsetWindowFileName + "\n"; } } } } else { problemString += "\tdbfile_registry.path does not exist:\t" + rSetFinalPath + "\n"; } } if(! problemString.equals("")){ rsetProblems.add(rsetName + " ResultSet has problems:\n" + problemString); } } int numProbs = rsetProblems.size(); if(numProbs != 0){ ReportManager.problem(this, con, "Found " + numProbs + " ResultSets with problems.\n"); result = false; for(int i=0; i<numProbs; i++){ if(i >= MAX_REPORT){ //Both these seem to report even with when restricting to -output problem? ReportManager.info(this, con, rsetProblems.get(i).toString()); } else{ ReportManager.problem(this, con, rsetProblems.get(i).toString()); } } } else{ ReportManager.info(this, con, "Found 0 ResultSets with problems."); } } else { ReportManager.problem(this, con, "Found " + numRsets + " result_sets but " + "dbfile.data_root does not seem to be valid:\t" + root_dir); result = false; //could return here? } } // END OF EXISTING RESULT SET CHECK } else { //no rsetDBDataRoot if(numRsets == 0){ //could sanity check we don't have a build here? ReportManager.info(this, con, "Found no result_sets or dbfile.data_root"); } else{ ReportManager.problem(this, con, "Found " + numRsets + "result_sets but no dbfile.data_root meta key. Please add a dbfile.data_root meta key to perform this HC"); result = false; //could return here? } } } catch (SQLException e){ e.printStackTrace(); } return result; } // Need to push these to some core File utils class public static boolean isSymLink(File file) throws IOException { if (file == null) throw new NullPointerException("File argument cannot be null"); File cfile; if (file.getParent() == null) { cfile = file; } else { File canonDir = file.getParentFile().getCanonicalFile(); cfile = new File(canonDir, file.getName()); } return ! cfile.getCanonicalFile().equals( cfile.getAbsoluteFile() ); } public static boolean isSymLink(String path) throws IOException { if (path == null) throw new NullPointerException("Path argument cannot be null"); File pathFile = new File(path); return isSymLink(pathFile); } }