package org.genedb.crawl.elasticsearch.index.das;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.xml.bind.JAXBException;
import org.apache.log4j.Logger;
import org.genedb.crawl.elasticsearch.index.NonDatabaseDataSourceIndexBuilder;
import org.genedb.crawl.model.Coordinates;
import org.genedb.crawl.model.Cvterm;
import org.genedb.crawl.model.Feature;
import org.genedb.crawl.model.LocatedFeature;
import org.genedb.crawl.model.Organism;
import org.kohsuke.args4j.Option;
import uk.ac.ebi.das.jdas.adapters.features.FeatureAdapter;
import uk.ac.ebi.das.jdas.exceptions.ValidationException;
import uk.ac.ebi.das.jdas.schema.entryPoints.SEGMENT;
import org.genedb.crawl.model.Property;
import org.genedb.crawl.modelling.RegionFeatureBuilder;
public class DASIndexBuilder extends NonDatabaseDataSourceIndexBuilder {
private static Logger logger = Logger.getLogger(DASIndexBuilder.class);
@Option(name = "-u", aliases = { "--url" }, usage = "A url to a DAS registry.", required = true)
public URL url;
@Option(name = "-s", aliases = { "--source" }, usage = "The name of the DAS source.", required = true)
public String source;
@Option(name = "-r", aliases = { "--region" }, usage = "If you only want to index one region, specify its id here.", required = false)
public String region;
@Option(name = "-c", aliases = { "--create_regions" }, usage = "If true, an attempt will be made to create/update regions from the DAS source.", required = false)
public boolean createRegions = false;
@Option(name = "-i", aliases = { "--interbase" }, usage = "If true, assumes the DAS source coordinates are interbase (default is true). If false, it will subtract 1 from the start position.", required = false)
public boolean interbase = true;
@Option(name = "-o", aliases = { "--organism" }, usage = "The organism, expressed as a JSON.", required = false)
public String organism;
public void run() throws IOException, JAXBException, SecurityException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException, ValidationException {
init();
Organism o = getAndPossiblyStoreOrganism(organism);
DasFetcher fetcher = new DasFetcher(url, source);
for (SEGMENT segment : fetcher.getEntryPoints()) {
logger.info(segment);
if (region != null) {
if (!region.equals(segment.getId())) {
continue;
}
}
if (createRegions) {
String sequence = fetcher.getSequence(segment, segment.getStart(), segment.getStop());
logger.debug(sequence);
RegionFeatureBuilder rfb = new RegionFeatureBuilder(segment.getId(), o.ID);
// @FIXME some other way will need to be made for fetching DAS sequences as we are
// not storing them in the ES any more...
// rfb.addSequence(sequence);
Feature region = rfb.getRegion();
regionsMapper.createOrUpdate(region);
logger.debug(String.format("Indexing region : %s (%d)", region.uniqueName, region.residues.length()));
}
List<FeatureAdapter> features = fetcher.getFeatures(segment, segment.getStart(), segment.getStop());
this.indexFeatures(o, segment, features);
}
}
protected void indexFeatures(Organism o, SEGMENT segment, List<FeatureAdapter> features) throws ValidationException, IOException {
for (FeatureAdapter featureAdapter : features) {
LocatedFeature feature = new LocatedFeature();
// if the DAS source is not interbase, then must subtract one from its fmin
int fmin = interbase ? featureAdapter.getStart() : featureAdapter.getStart() - 1;
int fmax = featureAdapter.getEnd();
feature.uniqueName = featureAdapter.getId();
feature.fmin = fmin;
feature.fmax = fmax;
feature.organism_id = o.ID;
feature.region = segment.getId();
feature.type = new Cvterm();
feature.type.name = featureAdapter.getType().getId();
Coordinates coordinates = new Coordinates();
feature.coordinates = new ArrayList<Coordinates>();
feature.coordinates.add(coordinates);
coordinates.region = feature.region;
coordinates.fmin = fmin;
coordinates.fmax = fmax;
feature.properties = new ArrayList<Property>();
Property prop = new Property();
prop.name = "comment";
prop.value = String.format("Pulled in from %s/%s/%s", url, source, segment.getId());
// prop.type = new Cvterm();
// prop.type.name = "comment";
// prop.type.cv = new Cv();
// prop.type.cv.name = "cvterm_property_type";
feature.properties.add(prop);
featureMapper.createOrUpdate(feature);
logger.debug(String.format("Indexing feature: %s %s %s %s", featureAdapter.getId(), featureAdapter.getType().getId(), featureAdapter.getStart(), featureAdapter.getEnd()));
}
}
public static void main(String[] args) throws Exception {
new DASIndexBuilder().prerun(args).closeIndex();
}
}