/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.io; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; import org.apache.log4j.Logger; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.SAXReader; import bio.pih.genoogle.Genoogle; import bio.pih.genoogle.index.ValueOutOfBoundsException; import bio.pih.genoogle.search.SearchManager; import bio.pih.genoogle.seq.Alphabet; import bio.pih.genoogle.seq.AminoAcidAlphabet; import bio.pih.genoogle.seq.DNAAlphabet; import bio.pih.genoogle.seq.RNAAlphabet; import com.google.common.collect.Lists; /** * Read and execute the configuration from a the XML file. * * @author albrecht */ public class XMLConfigurationReader { private static Logger logger = Logger.getLogger(XMLConfigurationReader.class.getCanonicalName()); private static File confFile = new File(Genoogle.getHome(), "conf" + File.separator + "genoogle.xml"); private static Document doc = null; static { try { doc = new SAXReader().read(confFile); } catch (Exception e) { logger.fatal("Error reading the configuration at " + confFile + "."); logger.fatal(e); } } /** * @return a brand new {@link SearchManager} with the parameters read from * genoogle.xml and with its data banks. */ public static SearchManager getSearchManager() throws IOException, ValueOutOfBoundsException, InvalidConfigurationException { Element rootElement = doc.getRootElement(); Element searchManagerElement = rootElement.element("search-manager"); SearchManager searchManager = new SearchManager(getMaxSimultaneousSearchs(searchManagerElement)); List<AbstractSequenceDataBank> dataBanks = XMLConfigurationReader.getDataBanks(); for (AbstractSequenceDataBank dataBank : dataBanks) { if (dataBank.load()) { searchManager.addDatabank(dataBank); } else { logger.fatal("It was not possible to load the data bank \"" + dataBank.getName() + "\"."); } } return searchManager; } /** * @return how many simultaneous searchs a searchManager can handle. */ private static int getMaxSimultaneousSearchs(Element searchManager) { Element maxSimultaneousSearchs = searchManager.element("max-simultaneous-searchs"); String value = maxSimultaneousSearchs.attributeValue("value"); return Integer.parseInt(value); } private static Integer match = null; private static Integer mismatch = null; public static int getMatchScore() { if (match == null) { Element rootElement = doc.getRootElement(); Element scoreElement = rootElement.element("score"); Element matchElement = scoreElement.element("match"); String value = matchElement.attributeValue("value"); match = Integer.parseInt(value); } return match.intValue(); } public static int getMismatchScore() { if (mismatch == null) { Element rootElement = doc.getRootElement(); Element scoreElement = rootElement.element("score"); Element mismatchElement = scoreElement.element("mismatch"); String value = mismatchElement.attributeValue("value"); mismatch = Integer.parseInt(value); } return mismatch.intValue(); } /** * @return {@link List} of {@link AbstractSequenceDataBank} that are * configured in the XML file. */ @SuppressWarnings("unchecked") public static List<AbstractSequenceDataBank> getDataBanks() throws IOException, InvalidConfigurationException { Element rootElement = doc.getRootElement(); Element databanks = rootElement.element("databanks"); if (databanks == null) { return null; } List<AbstractSequenceDataBank> sequenceDataBanks = Lists.newLinkedList(); Iterator<AbstractSimpleSequenceDataBank> databankIterator = databanks.elementIterator(); while (databankIterator.hasNext()) { AbstractSequenceDataBank databank = getDatabank((Element) databankIterator.next(), null); if (databank == null) { return null; } sequenceDataBanks.add(databank); } return sequenceDataBanks; } private static AbstractSequenceDataBank getDatabank(Element e, AbstractDatabankCollection<? extends AbstractSimpleSequenceDataBank> parent) throws IOException, InvalidConfigurationException { String name = e.attributeValue("name"); String path = readPath(e.attributeValue("path")); String mask = e.attributeValue("mask"); String lowComplexityFilterString = e.attributeValue("low-complexity-filter"); String type = e.attributeValue("type"); String remoteSimilarity = e.attributeValue("remoteSimilarity"); boolean remoteSimilarityDatabank = false; if (remoteSimilarity != null) { remoteSimilarityDatabank = Boolean.parseBoolean(remoteSimilarity); } String subSequenceLengthString = e.attributeValue("sub-sequence-length"); int subSequenceLength; if (parent != null) { subSequenceLength = parent.getSubSequenceLength(); } else { subSequenceLength = Integer.parseInt(subSequenceLengthString); } if (name == null) { throw new InvalidConfigurationException("Missing attribute name in element " + e.getName()); } if (path == null) { throw new InvalidConfigurationException("Missing attribute path in element " + e.getName()); } if (path.equals(name)) { throw new InvalidConfigurationException("It is not possible to have a FASTA file (" + path + ") with the same name (" + name + ") of the data base."); } if ((parent != null) && path.equals(parent.getName())) { throw new InvalidConfigurationException("It is not possible to have a FASTA (" + path + ") file with the same name (" + parent.getName() + ") of the its parent data bank."); } int lowComplexityFilter = -1; if (lowComplexityFilterString != null) { lowComplexityFilter = Integer.parseInt(lowComplexityFilterString); } Alphabet alphabet = DNAAlphabet.SINGLETON; if (type != null) { if (type.toLowerCase().equals("dna")) { alphabet = DNAAlphabet.SINGLETON; } else if (type.toLowerCase().equals("rna")) { alphabet = RNAAlphabet.SINGLETON; } else if (type.toLowerCase().equals("protein")) { alphabet = AminoAcidAlphabet.SINGLETON; } else { throw new InvalidConfigurationException("Sequences type: " + type + " is invalid."); } } else { if (parent != null) { alphabet = parent.getAlphabet(); } } if (e.getName().trim().equals("split-databanks")) { String numberOfSubDatabanksAttr = e.attributeValue("number-of-sub-databanks"); if (numberOfSubDatabanksAttr == null) { throw new RuntimeException("Missing atribute 'number-of-sub-databanks' in the databank " + name); } int size = Integer.parseInt(numberOfSubDatabanksAttr); SplittedDatabankCollection splittedSequenceDatabank = new SplittedDatabankCollection(name, alphabet, new File(Genoogle.getHome(), path), subSequenceLength, size, mask); splittedSequenceDatabank.setLowComplexityFilter(lowComplexityFilter); Iterator databankIterator = e.elementIterator(); while (databankIterator.hasNext()) { try { IndexedSequenceDataBank databank = (IndexedSequenceDataBank) getDatabank((Element) databankIterator.next(), splittedSequenceDatabank); if (databank == null) { return null; } splittedSequenceDatabank.addDatabank(databank); } catch (DuplicateDatabankException e1) { logger.fatal("Duplicate databanks named " + e1.getDatabankName() + " defined in " + e1.getDatabankName(), e1); return null; } } return splittedSequenceDatabank; } else if (e.getName().trim().equals("databank")) { File file = new File(path); try { if (remoteSimilarityDatabank == false) { return new IndexedSequenceDataBank(name, alphabet, subSequenceLength, mask, file, parent); } else { return new RemoteSimilaritySequenceDataBank(name, alphabet, subSequenceLength, file, parent); } } catch (ValueOutOfBoundsException e1) { logger.fatal("Error creating IndexedDNASequenceDataBank.", e1); } return null; } logger.error("Unknow element name " + e.getName()); return null; } private static Element getSearchParameters() { return doc.getRootElement().element("search-parameters"); } /** * @return max SubSequence distance */ public static int getMaxSubSequenceDistance() { String value = getSearchParameters().element("max-sub-sequence-distance").attributeValue("value"); return Integer.parseInt(value); } /** * @return default extended drop off specified at the XML configuration * file. */ public static int getExtendDropoff() { String value = getSearchParameters().element("extend-dropoff").attributeValue("value"); return Integer.parseInt(value); } /** * @return default minimum length of a HSP to be keep to the next seaching * phase. */ public static int getMinHspLength() { String value = getSearchParameters().element("min-hsp-length").attributeValue("value"); return Integer.parseInt(value); } /** * @return how many Hits results. */ public static int getMaxResults() { String value = getSearchParameters().element("max-hits-results").attributeValue("value"); return Integer.parseInt(value); } /** * @return Max number of threads that will be used to search sub-sequences * at the index. */ public static int getMaxThreadsIndexSearch() { String value = getSearchParameters().element("max-threads-index-search").attributeValue("value"); return Integer.parseInt(value); } /** * @return Max number of threads that will be used to extend and align the * HSP. */ public static int getMaxThreadsExtendAlign() { String value = getSearchParameters().element("max-threads-extend-align").attributeValue("value"); return Integer.parseInt(value); } /** * @return minimum size of each input query slice. */ public static int getMinQuerySliceLength() { String value = getSearchParameters().element("min-query-slice-length").attributeValue("value"); return Integer.parseInt(value); } /** * @return how many slices the input query will be divided. */ public static int getQuerySplitQuantity() { String value = getSearchParameters().element("query-split-quantity").attributeValue("value"); return Integer.parseInt(value); } private static String readPath(String path) { return path.replace('/', File.separatorChar); } private static Element getWebService() { return doc.getRootElement().element("web-service"); } public static String getWebServiceAddress() { return getWebService().element("server-address").attributeValue("value"); } public static Boolean useSessions() { String value = getWebService().element("use-sessions").attributeValue("value"); return Boolean.parseBoolean(value); } }