/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009,2010 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.ExecutionException;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import bio.pih.genoogle.index.ValueOutOfBoundsException;
import bio.pih.genoogle.interfaces.Console;
import bio.pih.genoogle.io.AbstractSequenceDataBank;
import bio.pih.genoogle.io.AbstractSimpleSequenceDataBank;
import bio.pih.genoogle.io.InvalidConfigurationException;
import bio.pih.genoogle.io.RemoteSimilaritySequenceDataBank;
import bio.pih.genoogle.io.Utils;
import bio.pih.genoogle.io.XMLConfigurationReader;
import bio.pih.genoogle.io.proto.Io.StoredSequence;
import bio.pih.genoogle.io.reader.ParseException;
import bio.pih.genoogle.search.SearchManager;
import bio.pih.genoogle.search.SearchParams;
import bio.pih.genoogle.search.SearchParams.Parameter;
import bio.pih.genoogle.search.UnknowDataBankException;
import bio.pih.genoogle.search.results.SearchResults;
import bio.pih.genoogle.seq.IllegalSymbolException;
import bio.pih.genoogle.seq.LightweightSymbolList;
import bio.pih.genoogle.seq.SymbolList;
import com.google.common.collect.Lists;
/**
* The main class of Genoogle. To get a Genoogle instance, use the getInstance() method.
*
* @author albrecht
*/
public final class Genoogle {
public static final File CONF_LOG4J_PROPERTIES_FILE = new File(getHome(), "conf/log4j.properties");
public static String line = System.getProperty("line.separator");
public static String SOFTWARE_NAME = "Genoogle";
public static Double VERSION = 0.98;
public static String AUTHOR = "Felipe Albrecht (felipe.albrecht@gmail.com).";
public static String WEB_PAGE = "http://genoogle.pih.bio.br";
public static String COPYRIGHT = "Copyright (C) 2008,2009,2010,2011,2012 Felipe Fernandes Albrecht";
public static String COPYRIGHT_NOTICE = line
+ "-----------------------------------------------------------------------------------------" + line
+ SOFTWARE_NAME + " Copyright (C) 2008, 2009, 2010, 2011 " + AUTHOR + line
+ "This program comes with ABSOLUTELY NO WARRANTY;" + line
+ "This is free software, and you are welcome to redistribute it under certain conditions;" + line
+ "See the LICENCE file or check at http://www.gnu.org/licenses/gpl-3.0.html for full license." + line
+ "-------------------------------------------------------------------------------------------";
SearchManager sm = null;
private static volatile Genoogle singleton = null;
static Logger logger = Logger.getLogger(Genoogle.class.getName());
private static boolean forceFormatting = true;
/**
* Get the {@link Genoogle} execution instance.
*
* @return {@link Genoogle} singleton instance or <code>null</code> if an error did happen.
*/
public synchronized static Genoogle getInstance() {
if (singleton == null) {
logger.info("Starting Genoogle .");
try {
singleton = new Genoogle();
} catch (IOException e) {
logger.fatal(e.getMessage(), e);
return null;
} catch (ValueOutOfBoundsException e) {
logger.fatal(e.getMessage(), e);
return null;
} catch (InvalidConfigurationException e) {
logger.fatal(e.getMessage(), e);
return null;
}
}
return singleton;
}
/**
* Private constructor.
*/
private Genoogle() throws IOException, ValueOutOfBoundsException, InvalidConfigurationException {
PropertyConfigurator.configure(CONF_LOG4J_PROPERTIES_FILE.getAbsolutePath());
sm = XMLConfigurationReader.getSearchManager();
}
/**
* Classes which use Genoogle and should be notified about changes.
*/
private List<GenoogleListener> listerners = Lists.newLinkedList();
/**
* Add a new listener to Genoogle which will be notified about changes.
*
* @param listerner
*/
public void addListerner(GenoogleListener listerner) {
listerners.add(listerner);
}
/**
* Finish {@link Genoogle} and notify the listeners to finish.
*/
public synchronized void finish() {
for (GenoogleListener listerner : listerners) {
listerner.finish();
}
try {
sm.shutdown();
} catch (InterruptedException e) {
logger.fatal(e);
}
}
/**
* Get the data bank name where the searches are performed when the data bank is not specified.
*
* @return Default data bank name
*/
public String getDefaultDatabank() {
return sm.getDefaultDataBankName();
}
/**
* Get a {@link Collection} of all available data banks
*
* @return {@link Collection} of all {@link AbstractSequenceDataBank} which it is possible to
* execute a query.
*/
public Collection<AbstractSequenceDataBank> getDatabanks() {
return sm.getDatabanks();
}
/**
* Do the search at the default data bank, reading the queries from the given
* {@link BufferedReader} and returning the execution line only after all searches are finished.
*
* @param in
* {@link BufferedReader} where the sequences are read.
* @return {@link List} of {@link SearchResults}, being one {@link SearchResults} for each input
* sequence inside the given {@link BufferedReader}.
*/
public List<SearchResults> doBatchSyncSearch(BufferedReader in) throws IOException, UnknowDataBankException,
InterruptedException, ExecutionException, NoSuchElementException, IllegalSymbolException, ParseException {
String defaultDataBankName = sm.getDefaultDataBankName();
return doBatchSyncSearch(in, defaultDataBankName);
}
/**
* Do the search at the specified data bank, reading the queries from the given
* {@link BufferedReader} and returning the execution line only after all searches are finished.
*
* @param in
* {@link BufferedReader} where the sequences are read.
* @param databankName
* Data bank name where the search will be made.
* @return {@link List} of {@link SearchResults}, being one {@link SearchResults} for each input
* sequence inside the given {@link BufferedReader}.
*/
public List<SearchResults> doBatchSyncSearch(BufferedReader in, String databankName) throws IOException,
UnknowDataBankException, InterruptedException, ExecutionException, NoSuchElementException,
IllegalSymbolException, ParseException {
return doBatchSyncSearch(in, databankName, null);
}
/**
* Do the search at the specified data bank, reading the queries from the given
* {@link BufferedReader}, using the specified {@link Map} of {@link Parameter} as parameters,
* and returning the execution line only after all searches are finished.
*
* @param in
* {@link BufferedReader} where the sequences are read.
* @param databankName
* Data bank name where the search will be made.
* @param parameters
* {@link Map} of {@link Parameter} which will be used in these searches.
*
* @return {@link List} of {@link SearchResults}, being one {@link SearchResults} for each input
* sequence inside the given {@link BufferedReader}.
*/
public List<SearchResults> doBatchSyncSearch(BufferedReader in, String databankName,
Map<Parameter, Object> parameters) throws IOException, UnknowDataBankException, InterruptedException,
ExecutionException, NoSuchElementException, IllegalSymbolException, ParseException {
return sm.doSyncSearch(in, databankName, parameters);
}
/**
* Do the search of the given sequence at the default data bank and returning the execution line
* only after all searches are finished.
*
* @param inputSequence
* input sequence for the searching.
* @return A {@link SearchResults} containing the results of this search.
*/
public SearchResults doSyncSearch(String inputSequence) {
String defaultDataBankName = sm.getDefaultDataBankName();
return doSyncSearch(inputSequence, defaultDataBankName);
}
/**
* Do the search of the given sequence at the informed data bank and returning the execution
* line only after all searches are finished.
*
* @param seqString
* input sequence for the searching.
* @param dataBankName
* data bank name where the search will be performed.
*
* @return A {@link SearchResults} containing the results of this search.
*/
public SearchResults doSyncSearch(String seqString, String dataBankName) {
SearchResults sr = null;
seqString = seqString.trim();
try {
SymbolList sequence = LightweightSymbolList.createDNA(seqString);
SearchParams sp = new SearchParams(sequence, dataBankName);
sr = sm.doSyncSearch(sp);
} catch (UnknowDataBankException e) {
logger.error(e.getMessage(), e);
} catch (IllegalSymbolException e) {
logger.error(e.getMessage(), e);
} catch (InterruptedException e) {
// TODO: Stop thread (do for all interrupted exceptions)
logger.error(e.getMessage(), e);
} catch (ExecutionException e) {
logger.error(e.getMessage(), e);
}
return sr;
}
/**
* Do the search of the given sequence at the informed data bank, using the specified
* {@link Map} of {@link Parameter} as parameters, and returning the execution line only after
* all searches are finished.
*
* @param seqString
* input sequence for the searching.
* @param dataBankName
* data bank name where the search will be performed.
* @param parameters
* {@link Map} of {@link Parameter} which will be used in these searches.
*
* @return A {@link SearchResults} containing the results of this search.
*/
public SearchResults doSyncSearch(String seqString, String dataBankName, Map<Parameter, Object> parameters) {
SearchResults sr = null;
seqString = seqString.trim();
try {
SymbolList sequence = LightweightSymbolList.createDNA(seqString);
SearchParams sp = new SearchParams(sequence, dataBankName, parameters);
sr = sm.doSyncSearch(sp);
} catch (UnknowDataBankException e) {
logger.error(e.getMessage(), e);
} catch (IllegalSymbolException e) {
logger.error(e.getMessage(), e);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
} catch (ExecutionException e) {
logger.error(e.getMessage(), e);
}
return sr;
}
public String getSequence(String db, int id) {
AbstractSequenceDataBank databank = sm.getDatabank(db);
if (databank instanceof RemoteSimilaritySequenceDataBank) {
try {
RemoteSimilaritySequenceDataBank abstractSimpleSequenceDataBank = (RemoteSimilaritySequenceDataBank) databank;
StoredSequence sequence = abstractSimpleSequenceDataBank.getSequenceFromId(id);
int[] encodedDatabankSequence = Utils.getEncodedSequenceAsArray(sequence);
return abstractSimpleSequenceDataBank.getAaEncoder().decodeIntegerArrayToString(encodedDatabankSequence);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
throw new RuntimeException("Databank " + db + " is a collection. Get sequence is not supported. Yet.");
}
}
/**
* Main method: Use the "-g" option to encode and create inverted index for the data banks or
* "-b file" to execute the commands specified at the file or do not use parameters and use the
* console.
*/
public static void main(String[] args) throws IOException, ValueOutOfBoundsException, InvalidConfigurationException {
PropertyConfigurator.configure(CONF_LOG4J_PROPERTIES_FILE.getAbsolutePath());
logger.info(COPYRIGHT_NOTICE);
System.err.println(getHome());
List<AbstractSequenceDataBank> dataBanks = XMLConfigurationReader.getDataBanks();
if (args.length == 0) {
Console console = new Console();
new Thread(console).start();
} else {
String option = args[0];
System.out.println("Options: " + option);
if (option.equals("-h")) {
showHelp();
}
if (option.equals("-g")) {
logger.info("Searching for non encoded data banks.");
for (AbstractSequenceDataBank dataBank : dataBanks) {
if (!dataBank.check()) {
dataBank.delete();
logger.info("Data bank " + dataBank.getName() + " is not encoded.");
try {
dataBank.encodeSequences(forceFormatting);
} catch (Exception e) {
logger.fatal(e, e);
return;
}
}
}
logger.info("All specified data banks are encoded. You can do yours searchs now.");
return;
}
else if (args.length >= 2 && option.equals("-b")) {
String inputFile = args[1];
Console console = new Console(new File(inputFile));
new Thread(console).start();
} else {
showHelp();
}
}
}
private volatile static File home = null;
public static File getHome() {
if (home == null) {
String homeEnv = System.getenv("GENOOGLE_HOME");
if (homeEnv != null) {
home = new File(homeEnv);
} else {
home = new File(".");
}
}
return home;
}
private static void showHelp() {
logger.info("Options for Genoogle console mode execution:");
logger.info(" -h : this help.");
logger.info(" -g : encode all not encoded databanks specified at conf/genoogle.conf .");
logger.info(" -b <BATCH_FILE> : starts genoogle and execute the <BATCH_FILE> .");
}
}