package org.genedb.crawl.elasticsearch.index.sql; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.Reader; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Properties; import org.apache.ibatis.io.Resources; import org.apache.ibatis.session.SqlSession; import org.apache.ibatis.session.SqlSessionFactory; import org.apache.ibatis.session.SqlSessionFactoryBuilder; import org.apache.log4j.Logger; import org.codehaus.jackson.type.TypeReference; import org.genedb.crawl.CrawlException; import org.genedb.crawl.elasticsearch.index.IndexBuilder; import org.genedb.crawl.elasticsearch.mappers.ElasticSearchFeatureMapper; import org.genedb.crawl.elasticsearch.mappers.ElasticSearchOrganismsMapper; import org.genedb.crawl.elasticsearch.mappers.ElasticSearchRegionsMapper; import org.genedb.crawl.json.JsonIzer; import org.genedb.crawl.mappers.AuditMapper; import org.genedb.crawl.mappers.FeatureMapper; import org.genedb.crawl.mappers.FeaturesMapper; import org.genedb.crawl.mappers.OrganismsMapper; import org.genedb.crawl.mappers.RegionsMapper; import org.genedb.crawl.mappers.TermsMapper; import org.genedb.crawl.model.Cvterm; import org.genedb.crawl.model.Organism; import org.kohsuke.args4j.Option; import com.hazelcast.core.Hazelcast; public class IncrementalSQLIndexBuilder extends IndexBuilder { private static Logger logger = Logger.getLogger(IncrementalSQLIndexBuilder.class); protected OrganismsMapper organismMapper; protected FeaturesMapper featuresMapper; protected FeatureMapper featureMapper; protected RegionsMapper regionsMapper; protected TermsMapper termsMapper; protected AuditMapper auditMapper; @Option(name = "-s", aliases = {"--since"}, usage = "The date formatted as yyyy-MM-dd", required = false) public String since; @Option(name = "-o", aliases = {"--organism"}, usage = "The organism common name", required = false) public String organismCommonName; @Option(name = "-r", aliases = {"--region"}, usage = "The region name", required = false) public String region; @Option(name = "-f", aliases = {"--features"}, usage = "Index the i.e. features of the supplied organism or region", required = false) public boolean features = false; @Option(name = "-pc", aliases = {"--properties_chado"}, usage = "A properties file specifying SQL connection details", required=true) public File chadoPropertiesFile; @Option(name = "-e", aliases = {"--exclude"}, usage = "Whether to exclude or include the supplied types", required=false) public boolean exclude = false; @Option(name = "-t", aliases = {"--types"}, usage = "The types to include or exclude, supplied as a JSON ['array', 'of', 'strings'].", required=false) public String types = defaultTypes; private static final String defaultTypes = "[\"gene\", \"pseudogene\", \"match_part\", \"repeat_region\", \"repeat_unit\", \"direct_repeat\", \"EST_match\", \"region\", \"polypeptide\", \"mRNA\", \"pseudogenic_transcript\", \"nucleotide_match\", \"exon\", \"pseudogenic_exon\", \"gap\", \"contig\", \"ncRNA\", \"tRNA\", \"five_prime_UTR\", \"three_prime_UTR\", \"polypeptide_motif\"]"; private Properties chadoProperties; private static final String resource = "ibatis-datasourced.xml"; private SqlSessionFactory sqlMapper = null; private SqlSession session ; private ElasticSearchOrganismsMapper esOrganismMapper; private ElasticSearchFeatureMapper esFeatureMapper; private ElasticSearchRegionsMapper esRegionsMapper; private List<Cvterm> relationships = new ArrayList<Cvterm>(); private JsonIzer jsonIzer = new JsonIzer(); public void run() throws CrawlException, ParseException, IOException { setupIndex(); setupSession(); // SQL mappers organismMapper = session.getMapper(OrganismsMapper.class); featuresMapper = session.getMapper(FeaturesMapper.class); featureMapper = session.getMapper(FeatureMapper.class); regionsMapper = session.getMapper(RegionsMapper.class); termsMapper = session.getMapper(TermsMapper.class); auditMapper= session.getMapper(AuditMapper.class); relationships.add(CvtermUtil.makeTerm(termsMapper, "derives_from", "sequence")); relationships.add(CvtermUtil.makeTerm(termsMapper, "part_of", "relationship")); // ES mappers esOrganismMapper = new ElasticSearchOrganismsMapper(); esOrganismMapper.setConnection(connection); esFeatureMapper = new ElasticSearchFeatureMapper(); esFeatureMapper.setConnection(connection); esRegionsMapper = new ElasticSearchRegionsMapper(); esRegionsMapper.setConnection(connection); SQLIndexer indexer = new SQLIndexer(); indexer.featureMapper = featureMapper; indexer.featuresMapper = featuresMapper; indexer.regionsMapper = regionsMapper; indexer.organismMapper = organismMapper; indexer.termsMapper = termsMapper; indexer.esFeatureMapper = esFeatureMapper; indexer.esOrganismMapper = esOrganismMapper; indexer.esRegionsMapper = esRegionsMapper; indexer.relationships = relationships; indexer.auditMapper = auditMapper; indexer.exclude = exclude; logger.debug("Setting types : " + types); indexer.types = (List<String>) jsonIzer.fromJson(types, new TypeReference<List<String>>() {} ); logger.info(String.format("Exclude? %s, Types: %s", indexer.exclude, indexer.types)); Organism o = null; if (organismCommonName != null) { o = organismMapper.getByCommonName(organismCommonName); } if (region != null) { if (features) { indexer.indexRegionContents(region); } else { indexer.indexRegion(region); } } else if (since != null) { indexer.indexFeaturesSince(getDate(since), o); } else { // only generate the organisms... if (o == null) { if (features) { throw new RuntimeException("Will not index the contents of all the organisms at once."); } else { indexer.indexOrganisms(); } } else { if (features) { indexer.indexOrganismContents(o); } else { indexer.indexOrganism(o); } } } } protected void setupSession() throws IOException { chadoProperties = new Properties(); chadoProperties.load(new FileInputStream(chadoPropertiesFile)); Reader reader = null; reader = Resources.getResourceAsReader(resource); sqlMapper = new SqlSessionFactoryBuilder().build(reader, chadoProperties); session = sqlMapper.openSession(); session.clearCache(); } protected void closeSession() { if (session != null) { session.close(); Hazelcast.shutdownAll(); } } Date getDate(String since) throws ParseException { Date sinceDate = Calendar.getInstance().getTime(); if (since != null) { SimpleDateFormat dateFormat = new SimpleDateFormat( "yyyy-MM-dd" ); sinceDate = dateFormat.parse(since); } return sinceDate; } public static void main(String[] args) throws Exception { new IncrementalSQLIndexBuilder().prerun(args); } }