package org.genedb.db.loading;
import org.gmod.schema.feature.Chromosome;
import org.gmod.schema.feature.Contig;
import org.gmod.schema.feature.Supercontig;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;
/**
* Retrieves and validates the arguments sent into the AGPLoader and calls the load method
*
* More documentation:
*
* @author nds
* @contact path-help@sanger.ac.uk
*/
public class LoadAGP extends FileProcessor{
private static final Logger logger = Logger.getLogger(LoadAGP.class);
/**
* Takes no command-line arguments, but expects to find the following system properties that
* determine how the loader will function.
* <ul>
* <li> <code>load.organismCommonName</code>
* <li> <code>load.mode</code> can be set to <b>1</b> (create a toplevel feature, and add contig & gap locations on it)
* or <b>2</b> (create contig and gap features based on an existing toplevel feature).
* <li> <code>load.topLevel</code> tells the loader if the top level feature(s) we need to deal with for this organism is a
* chromosome or supercontig. The default will be a supercontig.
* <li> <code>load.childLevel</code> tells the loader what the child level features are (usually contigs, occasionally supercontigs when
* they are assembled into chromosomes). The default will be a contig.
* <li> <code>load.createMissingContigs</code> tells the loader if it should create missing contigs in mode 1 where, in theory, all the contigs
* should already be in the database. In some cases, however, like Tcongolense it is ok to create contigs when they cannot be found as there are
* no contig features in the database for it anyway. Default is no.</li>
* <li> <code>load.putUnusedContigsInBin</code> tells the loader if it should put any unused child features (in mode 1) in the bin. If set to yes,
* it will look for a toplevel feature of the type specified with a name like '%bin%'. Default no.
* <li> <code>load.AGPFile</code>
* </ul>
* </p>
*
* The actual loading is dealt with by AGPLoader.
* This can be called from the command line using
*
ant load-agp -Dconfig=localcopy -Dorganism=Tcongolense -Dload.mode=1 -Dload.topLevel=chromosome -Dfile=Tcongolense.agp
*
* @param args ignored
* @throws MissingPropertyException if a required system property (as detailed above) is missing
* @throws ParsingException if a AGP file cannot be parsed
* @throws IOException if there's a problem opening or reading the file
*/
public static void main(String[] args) throws MissingPropertyException, IOException, ParsingException, SQLException {
if (args.length > 0) {
logger.warn("Ignoring command-line arguments");
}
//PropertyConfigurator.configure("resources/classpath/log4j.loader.properties");
String organismCommonName = getRequiredProperty("load.organismCommonName");
String mode = getPropertyWithDefault("load.mode", "1");
String topLevelFeatureType = getPropertyWithDefault("load.topLevel", "supercontig").toLowerCase();
String childLevelFeatureType = getPropertyWithDefault("load.childLevel", "contig").toLowerCase();
String createMissingContigs = getPropertyWithDefault("load.createMissingContigs", "no");
String fileNamePattern = getPropertyWithDefault("load.fileNamePattern", ".*\\.(agp)(?:\\.gz)?");
String putUnusedContigsInBin = getPropertyWithDefault("load.putUnusedContigsInBin", "no");
String inputDirectory = getRequiredProperty("load.inputDirectory");
logger.info(String.format("Options: organismCommonName=%s, mode=%s, topLevel=%s, inputDirectory=%s", organismCommonName, mode, topLevelFeatureType, childLevelFeatureType, inputDirectory));
LoadAGP loadAGP = new LoadAGP(organismCommonName, mode, topLevelFeatureType, childLevelFeatureType, createMissingContigs, putUnusedContigsInBin);
loadAGP.processFileOrDirectory(inputDirectory, fileNamePattern);
}
private AGPLoader loader;
/**
* Constructor. Gets the bean from the application context, validates the arguments and calls the AGPLoader.load method with the reader for the AGPFile.
* @param organismCommonName
* @param mode
* @param topLevelFeatureType
* @param childLevelFeatureType
* @param inputAGPFileName
*/
private LoadAGP(String organismCommonName, String mode, String topLevelFeatureType, String childLevelFeatureType, String createMissingContigs, String putUnusedContigsInBin) throws IOException{
ApplicationContext applicationContext = new ClassPathXmlApplicationContext(new String[] {"Load.xml"});
this.loader = applicationContext.getBean("agpLoader", AGPLoader.class);
//Organism name
loader.setOrganismCommonName(organismCommonName);
//Mode
if(mode.equals("1") || mode.equals("2")){
loader.setMode(mode);
}else{
throw new RuntimeException(String.format("Unrecognised value for load.mode: %s", mode));
}
//Top level: Only allows chromosome and supercontig type at the moment. Can add plasmid etc later on
if (topLevelFeatureType.equals("chromosome")) {
loader.setTopLevelFeatureClass(Chromosome.class);
} else if (topLevelFeatureType.equals("supercontig")) {
loader.setTopLevelFeatureClass(Supercontig.class);
} else {
throw new RuntimeException(String.format("Unrecognised value for load.topLevel: %s", topLevelFeatureType));
}
//Child level: Only allows contig or supercontig for now.
if (childLevelFeatureType.equals("contig")) {
loader.setChildLevelFeatureClass(Contig.class);
} else if (childLevelFeatureType.equals("supercontig")) {
loader.setChildLevelFeatureClass(Supercontig.class);
} else {
throw new RuntimeException(String.format("Unrecognised value for load.childLevel: %s", childLevelFeatureType));
}
//Check again that both the top level and child level are not set to be the same type of feature!
if(childLevelFeatureType.equals(topLevelFeatureType)){
throw new RuntimeException(String.format("Both the child level and the top level feature types are set to: %s", childLevelFeatureType));
}
//Should the loader create missing contigs
if(createMissingContigs.equalsIgnoreCase("yes") || createMissingContigs.equalsIgnoreCase("no")){
loader.setCreateMissingContigs(createMissingContigs);
}else{
throw new RuntimeException(String.format("Unrecognised value for load.createMissingContigs: %s", createMissingContigs));
}
//Should the loader put any unused contigs in the bin
if(putUnusedContigsInBin.equalsIgnoreCase("yes") || putUnusedContigsInBin.equalsIgnoreCase("no")){
loader.setCreateMissingContigs(createMissingContigs);
}else{
throw new RuntimeException(String.format("Unrecognised value for load.putUnusedContigsInBin: %s", createMissingContigs));
}
}
@Override
protected void processFile(File inputFile, Reader reader)throws IOException, ParsingException {
loader.load(new AGPFile(new BufferedReader(reader)));
}
}