package org.genedb.crawl.elasticsearch.index.gff; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.text.ParseException; import org.apache.log4j.Logger; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.exists.IndicesExistsRequest; import org.elasticsearch.action.admin.indices.exists.IndicesExistsResponse; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.admin.indices.flush.FlushResponse; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; import org.elasticsearch.client.IndicesAdminClient; import org.genedb.crawl.elasticsearch.index.NonDatabaseDataSourceIndexBuilder; import org.genedb.crawl.model.Organism; import org.kohsuke.args4j.Option; public class GFFIndexBuilder extends NonDatabaseDataSourceIndexBuilder { static Logger logger = Logger.getLogger(GFFIndexBuilder.class); @Option(name = "-g", aliases = {"--gffs"}, usage = "The path to the GFF folder", required = false) public String gffs; @Option(name = "-o", aliases = { "--organism" }, usage = "The organism, expressed as a JSON.", required = false) public String organism; public final static String featureMapping = "org/genedb/crawl/model/Feature.json"; public void run() throws IOException, ParseException, SecurityException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException { init(); IndicesAdminClient indexClient = connection.getClient().admin().indices(); // the put mapping below needs an existing index IndicesExistsResponse exists = indexClient.exists(new IndicesExistsRequest(connection.getIndex())).actionGet(); if (! exists.exists()) { CreateIndexResponse created = indexClient.create(new CreateIndexRequest(connection.getIndex())).actionGet(); logger.info("Index set? " + created.acknowledged()); if (! created.acknowledged()) throw new RuntimeException("Could not create index!"); } // apply the put mapping String featureMappingJson = getMapping(featureMapping); PutMappingResponse mapped = indexClient.preparePutMapping( connection.getIndex()) .setSource(featureMappingJson) .setType(connection.getFeatureType()) .execute() .actionGet(); if (! mapped.acknowledged()) throw new RuntimeException("Could not create mapping!"); if (gffs != null) { if (organism == null) { throw new RuntimeException("Please supply an organism if loading a gff because GFF files do not specify their organism"); } Organism o = getAndPossiblyStoreOrganism(organism); convertPath(gffs,o); } FlushResponse fr = indexClient.flush(new FlushRequest(this.connection.getIndex())).actionGet(); logger.info(String.format("Flush! %s failed, %s successful, %s total", fr.getFailedShards(), fr.getSuccessfulShards(), fr.getTotalShards())); logger.debug("Complete"); } /* * Returns the string for a resource file (to be used for mapping). */ private String getMapping(String mapping) throws IOException { // fetch the feature mapping definition InputStream is = this.getClass().getClassLoader().getResourceAsStream(mapping); BufferedReader buf = new BufferedReader(new InputStreamReader(is)); StringBuffer sb = new StringBuffer(); String line = null; while ((line = buf.readLine()) != null) { sb.append(line); } return sb.toString(); } public static void main(String[] args) throws Exception { new GFFIndexBuilder().prerun(args).closeIndex(); } }