package org.genedb.crawl.dao.backend;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.PostConstruct;
import org.apache.log4j.Logger;
import org.genedb.crawl.CrawlException;
import org.genedb.crawl.mappers.FeaturesMapper;
import org.genedb.crawl.mappers.OrganismsMapper;
import org.genedb.crawl.mappers.RegionsMapper;
import org.genedb.crawl.mappers.TermsMapper;
import org.genedb.crawl.model.Cvterm;
import org.genedb.crawl.model.Feature;
import org.genedb.crawl.model.LocatedFeature;
import org.genedb.crawl.model.LocationBoundaries;
import org.genedb.crawl.model.Organism;
import org.genedb.crawl.model.Sequence;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class RegionsDAO extends BaseDAO implements org.genedb.crawl.dao.RegionsDAO {
private Logger logger = Logger.getLogger(RegionsDAO.class);
@Autowired
RegionsMapper regionsMapper;
@Autowired
TermsMapper termsMapper;
@Autowired
FeaturesMapper featuresMapper;
@Autowired
OrganismsMapper organismsMapper;
private boolean cacheRegionsOnStartup = false;
private Map<String, List<Feature>> organismRegionMap = new HashMap<String, List<Feature>>();
public void setCacheRegionsOnStartup(boolean cacheRegionsOnStartup) {
this.cacheRegionsOnStartup = cacheRegionsOnStartup;
}
@PostConstruct
void setup() throws CrawlException {
if (! cacheRegionsOnStartup) {
return;
}
for (Organism o : organismsMapper.list()) {
List<Feature> r = regionsMapper.inorganism( o.ID, null, null, null );
Collections.sort(r, new FeatureUniqueNameSorter());
organismRegionMap.put(String.valueOf(o.ID), r);
logger.info(String.format("Cached %s.", o.common_name));
}
}
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#locations(java.lang.String, java.lang.Integer, java.lang.Integer, java.lang.Boolean, java.util.List)
*/
@Override
public List<LocatedFeature> locations(
String region,
Integer start,
Integer end,
Boolean exclude,
List<String> types
) throws CrawlException {
// the JAX-WS endpoint won't think to use default value, so we must assign them manually
if (exclude == null)
exclude = true;
if (start == null)
start = 0;
if (end == null)
end = regionsMapper.sequence(region).dna.length();
// logger.info(String.format("Getting locations for %s.", region));
// trying to speed up the boundary query by determining the types in advance
// String[] geneTypes = new String[] {"gene", "pseudogene"};
Set<String> geneTypes = new HashSet<String>();
if (types != null) {
geneTypes.addAll(types);
} else {
geneTypes.addAll(Arrays.asList(new String[]{"gene", "pseudogene"}));
}
// boundary calculations must include genes or pseudogenes, so we clone the set
Set<String> boundaryTypes = new HashSet<String>(geneTypes);
if (exclude) {
// if exluding types, make sure that genes and pseudogenes are not on that list for boundary calculations
boundaryTypes.remove("gene");
boundaryTypes.remove("pseudogene");
} else {
// conversely, if including types, then boundary calculations need genes and pseudogenes, whether or not these have been specified by the requester
boundaryTypes.add("gene");
boundaryTypes.add("pseudogene");
}
logger.info(String.format("%s %d-%d %s", region, start,end,exclude));
logger.info("Gene Types " + geneTypes);
int actualStart = start;
int actualEnd = end;
LocationBoundaries expandedBoundaries = regionsMapper.locationsMinAndMaxBoundaries(region, start, end, exclude, new ArrayList<String>(boundaryTypes));
// logger.debug(expandedBoundaries.start);
// logger.debug(expandedBoundaries.end);
if (expandedBoundaries != null) {
if (expandedBoundaries.start != null && expandedBoundaries.start < start) {
actualStart = expandedBoundaries.start;
}
if (expandedBoundaries.end != null &&expandedBoundaries.end > end) {
actualEnd = expandedBoundaries.end;
}
}
logger.debug( String.format("Locating on %s : %s-%s (%s)", region, actualStart, actualEnd, exclude));
return regionsMapper.locations(region, actualStart, actualEnd, exclude, new ArrayList<String>(geneTypes));
// results.actual_end = actualEnd;
// results.actual_start = actualStart;
//
// return results;
}
// @RequestMapping(method=RequestMethod.GET, value={"/locations_paged", "/locations_paged.*"})
// @ResourceDescription("Returns features and their locations on a region of interest, paged by limit and offset.")
// public ResultsRegions locationsPaged(
// @RequestParam("region") String region,
// @RequestParam("limit") int limit,
// @RequestParam("offset") int offset,
// @RequestParam(value="exclude", defaultValue="true") boolean exclude,
// @RequestParam(value="types", required=false) @ResourceDescription("A list of features types to exclude or include.") List<String> types
// ) throws CrawlException {
//
//
// logger.info(String.format("Getting locations for %s.", region));
//
// // trying to speed up the boundary query by determining the types in advance
// List<Integer> geneTypes = termsMapper.getCvtermIDs("sequence", new String[] {"gene", "pseudogene"});
//
// logger.info("Gene Types " + geneTypes);
//
// logger.info( String.format("Locating paged on %s : %s-%s (%s)", region, limit, offset, exclude));
//
// results.locations = regionsMapper.locationsPaged(region, limit, offset, exclude, types);
//
// return results;
//
// }
// @RequestMapping(method=RequestMethod.GET, value="/sequence")
// @ResourceDescription("Returns the sequence on a region.")
// public List<Sequence> sequenceInfo(
// @RequestParam("region") String region,
// @RequestParam(value="metadata_only", required=false, defaultValue="false") boolean metadataOnly) {
//
// List<Sequence> sequences = new ArrayList<Sequence>();
// Sequence sequence = regionsMapper.sequenceTrimmed(region, start, end);
//
// }
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#sequenceLength(java.lang.String)
*/
@Override
public List<Sequence> sequenceLength(String region) {
List<Sequence> sequences = new ArrayList<Sequence>();
Sequence sequence = regionsMapper.sequenceLength(region);
sequences.add(sequence);
return sequences;
}
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#sequence(java.lang.String, java.lang.Integer, java.lang.Integer)
*/
@Override
public List<Sequence> sequence(
String region,
Integer start,
Integer end) {
List<Sequence> sequences = new ArrayList<Sequence>();
if (start == null && end == null) {
Sequence sequence = regionsMapper.sequence(region);
sequence.start = 1;
sequence.end = sequence.length;
sequences.add(sequence);
} else {
Sequence sequence = regionsMapper.sequenceTrimmed(region, start, end);
sequences.add(sequence);
}
//results.sequences = sequences;
//
// String sequenceResidues = sequence.dna;
//
// int length = (sequence.length == null) ? sequenceResidues.length() : sequence.length;
// if (length == 0) {
// return sequences;
// }
//
// // if it's a simple case of no start or end position, just return what we've got
// if (start == null && end == null) {
//
// if (metadataOnly) {
// sequence.dna = null;
// }
// sequence.start = 0;
// sequence.end = length -1;
// sequence.region = region;
//
// return sequences;
// }
//
//
// if (start == null) {
// start = 0;
// }
//
// if (end == null) {
// end = length;
// }
//
// int lastResiduePosition = length -1;
// int actualStart = start -1;
// int actualEnd = end -1;
//
// if (actualStart > lastResiduePosition || actualStart > actualEnd) {
// return sequences;
// }
//
// if (actualEnd > lastResiduePosition) {
// actualEnd = lastResiduePosition;
// }
//
// if (! metadataOnly) {
// sequence.dna = sequenceResidues.substring(actualStart, actualEnd);
// } else {
// sequence.dna = null;
// }
// sequence.start = start;
// sequence.end = end;
// sequence.length = length;
// sequence.region = region;
return sequences;
}
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#getInfo(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public Feature getInfo(
String uniqueName,
String name,
String organism) throws CrawlException {
Integer organism_id = null;
if (organism != null) {
Organism o = util.getOrganism(organism);
if (o != null)
organism_id = o.ID;
}
return regionsMapper.getInfo(uniqueName, name, organism_id);
}
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#inorganism(java.lang.String, java.lang.Integer, java.lang.Integer, java.lang.String)
*/
@Override
public List<Feature> inorganism(
String organism,
Integer limit,
Integer offset,
String type) throws CrawlException {
Organism o = util.getOrganism(organism);
List<Feature> r = null;
if (organismRegionMap.containsKey(o.ID)) {
r = organismRegionMap.get(o.ID);
} else {
r = regionsMapper.inorganism( o.ID, limit, offset, type);
Collections.sort(r, new FeatureUniqueNameSorter());
organismRegionMap.put(String.valueOf(o.ID), r);
}
//results.regions = r;
return r;
}
/* (non-Javadoc)
* @see org.genedb.crawl.dao.backend.RegionsDAO#typesInOrganism(java.lang.String)
*/
@Override
public List<Feature> typesInOrganism(String organism) throws CrawlException {
Organism o = util.getOrganism(organism);
List<Cvterm> regionTypes = regionsMapper.typesInOrganism( o.ID );
List<Feature> regions = new ArrayList<Feature>();
for (Cvterm regionType : regionTypes) {
Feature region = new Feature();
region.type = regionType;
regions.add(region);
}
//results.regions = regions;
return regions;
}
class FeatureUniqueNameSorter implements Comparator<Feature> {
@Override
public int compare(Feature f1, Feature f2) {
return f1.uniqueName.compareTo(f2.uniqueName);
}
}
}