/** * Copyright Copyright 2010-14 Simon Andrews * * This file is part of BamQC. * * BamQC is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * BamQC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with BamQC; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Changelog: * - Piero Dalle Pezze: Class creation. */ package uk.ac.babraham.BamQC.Utilities; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.log4j.Logger; import uk.ac.babraham.BamQC.Preferences.BamQCPreferences; import uk.ac.babraham.BamQC.Network.DownloadableGenomes.DownloadableGenomeSet; import uk.ac.babraham.BamQC.Network.DownloadableGenomes.GenomeAssembly; import uk.ac.babraham.BamQC.Network.DownloadableGenomes.GenomeSpecies; /** * A simple class for listing the genomes available at the Babraham servers. * @author Piero Dalle Pezze */ public class BamQCListGenomes { private static Logger log = Logger.getLogger(BamQCListGenomes.class); /** * Return the list of genomes available on the Babraham server or null if this list cannot be downloaded. * Each species will be shown as follows: species [assembly1, assembly2, ... assemblyN]. * @param regex a regular expression string used as filter. (* is equivalent to unfiltered) * @return the list of available genomes or null */ public static GenomeSpecies[] listAvailableGenomes(String regex) { System.out.println("List of genomes (species [ assemblies ]) retrieved from the " + "Babraham Servers:"); GenomeSpecies[] gs = null; try { Pattern pattern = Pattern.compile(regex); Matcher matcher = null; String[] allSpeciesAssemblies = null; DownloadableGenomeSet dgs = new DownloadableGenomeSet(); gs = dgs.species(); if(gs.length > 0) { allSpeciesAssemblies = new String[gs.length]; // Store all species and assemblies in the array allSpeciesAssemblies for(int i=0;i<gs.length;i++) { allSpeciesAssemblies[i] = gs[i].name() + " [ "; GenomeAssembly[] ga = gs[i].assemblies(); for(int j=0;j<ga.length;j++) { allSpeciesAssemblies[i] += ga[j].assembly(); if(j < ga.length-1) { allSpeciesAssemblies[i] += " | "; } } allSpeciesAssemblies[i] += " ]"; } // Now print them using the matcher filter. for(int i=0; i<allSpeciesAssemblies.length; i++) { matcher = pattern.matcher(allSpeciesAssemblies[i]); if(matcher.find()) { System.out.println(allSpeciesAssemblies[i]); } } } else { System.out.println("Something went wrong. No species was retrieved from the " + "Babraham Servers. Is your Internet connection working?"); } } catch (IOException e) { log.error(e, e); } catch (PatternSyntaxException e) { log.error("The regular expression " + regex + " is not valid.", e); System.out.println("The regular expression " + regex + " is not valid."); } return gs; } /** * Return the list of downloaded genomes or null if this is empty * @return the list of downloaded genomes or null */ public static File[] listSavedGenomes() { File[] genomes = null; try { genomes = BamQCPreferences.getInstance().getGenomeBase().listFiles(); if (genomes == null) { throw new FileNotFoundException(); } } catch (FileNotFoundException e) { System.out.println("Could not find the folder containing your genomes. " + "Please check your file preferences."); return genomes; } System.out.println("Downloaded genomes:"); for(int i=0; i<genomes.length; i++) { System.out.println(genomes[i]); } return genomes; } /** * Converts a standard POSIX Shell globbing pattern into a regular expression * pattern. The result can be used with the standard {@link java.util.regex} API to * recognize strings which match the glob pattern. * <p/> * See also, the POSIX Shell language: * http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_01 * * @author Neil Traft (http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns) * @author Piero Dalle Pezze (small edits at the end). * * @param pattern A glob pattern. * @param matchWholeString true if the whole string is matched, false if only a substring is matched. * @return A regex pattern to recognize the given glob pattern. */ public static final String convertGlobToRegex(String pattern, boolean matchWholeString) { StringBuilder sb = new StringBuilder(pattern.length()); int inGroup = 0; int inClass = 0; int firstIndexInClass = -1; char[] arr = pattern.toCharArray(); for (int i = 0; i < arr.length; i++) { char ch = arr[i]; switch (ch) { case '\\': if (++i >= arr.length) { sb.append('\\'); } else { char next = arr[i]; switch (next) { case ',': // escape not needed break; case 'Q': case 'E': // extra escape needed sb.append('\\'); default: sb.append('\\'); } sb.append(next); } break; case '*': if (inClass == 0) sb.append(".*"); else sb.append('*'); break; case '?': if (inClass == 0) sb.append('.'); else sb.append('?'); break; case '[': inClass++; firstIndexInClass = i+1; sb.append('['); break; case ']': inClass--; sb.append(']'); break; case '.': case '(': case ')': case '+': case '|': case '^': case '$': case '@': case '%': if (inClass == 0 || (firstIndexInClass == i && ch == '^')) sb.append('\\'); sb.append(ch); break; case '!': if (firstIndexInClass == i) sb.append('^'); else sb.append('!'); break; case '{': inGroup++; sb.append('('); break; case '}': inGroup--; sb.append(')'); break; case ',': if (inGroup > 0) sb.append('|'); else sb.append(','); break; default: sb.append(ch); } } if(matchWholeString) { return "^" + sb.toString() + "$"; } return sb.toString(); } public static void main(String[] args) { Properties properties = System.getProperties(); if(properties.getProperty("bamqc.saved_genomes") != null && properties.getProperty("bamqc.saved_genomes").equals("true")) { BamQCListGenomes.listSavedGenomes(); } else if(properties.getProperty("bamqc.available_genomes") != null && properties.getProperty("bamqc.available_genomes").equals("true")) { // If it is preferred to match the whole string, then set matchWholeString to true. boolean matchWholeString = false; String pattern = "*", regex = ""; if(properties.getProperty("bamqc.genome_pattern") != null && !properties.getProperty("bamqc.genome_pattern").equals("")) { pattern = properties.getProperty("bamqc.genome_pattern"); } regex = convertGlobToRegex(pattern, matchWholeString); BamQCListGenomes.listAvailableGenomes(regex); } else { System.out.println("Please, use the option '--saved-genomes' or '--available-genomes'"); } } }