package org.genedb.crawl.elasticsearch.mappers; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.log4j.Logger; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.action.index.IndexRequestBuilder; import org.elasticsearch.client.action.search.SearchRequestBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.FieldQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.genedb.crawl.mappers.FeatureMapper; import org.genedb.crawl.model.Coordinates; import org.genedb.crawl.model.Cvterm; import org.genedb.crawl.model.Dbxref; import org.genedb.crawl.model.Exon; import org.genedb.crawl.model.Feature; import org.genedb.crawl.model.FeatureRelationship; import org.genedb.crawl.model.HierarchyRelation; import org.genedb.crawl.model.Orthologue; import org.genedb.crawl.model.Property; import org.genedb.crawl.model.Gene; import org.genedb.crawl.model.LocatedFeature; import org.genedb.crawl.model.Pub; import org.genedb.crawl.model.Synonym; import org.genedb.crawl.model.Transcript; import org.springframework.stereotype.Component; @Component public class ElasticSearchFeatureMapper extends ElasticSearchBaseMapper implements FeatureMapper { private Logger logger = Logger.getLogger(ElasticSearchFeatureMapper.class); @Override public Feature get(String uniqueName, String name, Integer organism_id, String type) { BoolQueryBuilder booleanQuery = QueryBuilders.boolQuery(); booleanQuery.must(QueryBuilders.fieldQuery("uniqueName",escape( uniqueName))); if (organism_id != null) { booleanQuery.must(QueryBuilders.fieldQuery("organism_id", organism_id)); } if (name != null) { booleanQuery.must(QueryBuilders.fieldQuery("name",escape( name))); } if (type != null) { booleanQuery.must(QueryBuilders.fieldQuery("type.name", escape(type))); } List<LocatedFeature> features = (List<LocatedFeature>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, LocatedFeature.class); return features.get(0); } /* XXX TODO: synonym search not implemented for now, no idea how this is done in Elasticsearch */ @Override public Feature getWithSynonym(String uniqueName, String name, Integer organism_id, String type) { BoolQueryBuilder booleanQuery = QueryBuilders.boolQuery(); booleanQuery.must(QueryBuilders.fieldQuery("uniqueName",escape( uniqueName))); if (organism_id != null) { booleanQuery.must(QueryBuilders.fieldQuery("organism_id", organism_id)); } if (name != null) { booleanQuery.must(QueryBuilders.fieldQuery("name",escape( name))); } if (type != null) { booleanQuery.must(QueryBuilders.fieldQuery("type.name", escape(type))); } List<LocatedFeature> features = (List<LocatedFeature>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, LocatedFeature.class); return features.get(0); } public LocatedFeature get(String uniqueName) { try { return (LocatedFeature) jsonIzer.fromJson (getFromElastic(connection.getIndex(), connection.getFeatureType(), uniqueName), LocatedFeature.class); } catch (Exception e) { logger.trace("Could not find " + uniqueName ); //e.printStackTrace(); } return null; } @Override public List<Property> properties(Feature feature) { return feature.properties; } @Override public List<Cvterm> terms(Feature feature) { return feature.terms; } @Override public List<Coordinates> coordinates(Feature feature) { return feature.coordinates; } @Override public List<Pub> pubs(Feature feature) { return feature.pubs; } // public void createOrUpdate(ElasticSequence sequence) { // // try { // String json = jsonIzer.toJson(sequence); // // logger.debug("Storing sequence: " + sequence.name); // // connection.getClient().prepareIndex("sequences", "Sequence", sequence.name) // .setSource(json) // .execute() // .actionGet(); // // } catch (Exception e) { // throw new RuntimeException(e); // } // // } public void createOrUpdate(Feature feature) { if (feature.coordinates != null && feature.coordinates.size() > 0) { if (! (feature instanceof LocatedFeature)) { LocatedFeature lFeature = new LocatedFeature(); for (Field field : Feature.class.getFields()) { try { field.set(lFeature, field.get(feature)); } catch (Exception e) { throw new RuntimeException(e); } } Coordinates c = feature.coordinates.get(0); lFeature.fmax = c.fmax; lFeature.fmin = c.fmin; lFeature.region = c.region; lFeature.phase = c.phase; lFeature.strand = c.strand; feature = lFeature; } } try { logger.debug("Storing: " + feature.uniqueName); IndexRequestBuilder builder = connection.getClient().prepareIndex( connection.getIndex(), connection.getFeatureType(), feature.uniqueName); String json = jsonIzer.toJson(feature); // logger.debug("Source:"); logger.debug(json); builder.setSource(json); // if (feature instanceof LocatedFeature) { // LocatedFeature lFeature = (LocatedFeature) feature; // if (lFeature.parent != null) { // logger.debug(String.format("Setting %s as parent of %s!", lFeature.parent, feature.uniqueName)); // builder.setParent(lFeature.parent); // } // } //logger.debug(connection.getClient().prepareGet(index, type, feature.uniqueName).execute().actionGet().sourceAsString()); builder.execute().actionGet(); } catch (Exception e) { throw new RuntimeException(e); } } @Override public void delete(Feature feature) { logger.debug("Deleting " + feature.uniqueName); DeleteResponse response = connection .getClient() .prepareDelete() .setIndex(connection.getIndex()) .setType(connection.getFeatureType()) .setId(feature.uniqueName) .execute() .actionGet(); if (response.isNotFound()) { logger.warn(feature.uniqueName + " not found"); } } @Override public List<Transcript> transcripts(Gene gene, boolean exons) { List<Transcript> transcripts = new ArrayList<Transcript>(); FieldQueryBuilder parentQuery = QueryBuilders.fieldQuery("parent", gene.uniqueName); FieldQueryBuilder relationshipQuery = QueryBuilders.fieldQuery("parentRelationshipType", "part_of"); FieldQueryBuilder typeQuery = QueryBuilders.fieldQuery("type.name", "mRNA"); BoolQueryBuilder transcriptQuery = QueryBuilders.boolQuery() .must(parentQuery) .must(relationshipQuery) .must(typeQuery); SearchRequestBuilder builder = connection .getClient() .prepareSearch(connection.getIndex()) .setTypes(connection.getFeatureType()); SearchResponse response = builder .setQuery(transcriptQuery) .setExplain(true) .execute() .actionGet(); for (SearchHit hit : response.getHits()) { String source = hit.sourceAsString(); //logger.debug(source); try { Transcript t = (Transcript) jsonIzer.fromJson(source, Transcript.class); // logger.info("adding transctipt " + t.uniqueName + " " + gene.uniqueName); transcripts.add(t); if (exons) { t.exons = exons(t); } } catch (Exception e) { logger.warn(e.getMessage()); continue; } } return transcripts; } public List<Exon> exons(Transcript transcript) { List<Exon> exons = new ArrayList<Exon>(); FieldQueryBuilder parentQuery = QueryBuilders.fieldQuery("parent", transcript.uniqueName); FieldQueryBuilder relationshipQuery = QueryBuilders.fieldQuery("parentRelationshipType", "part_of"); FieldQueryBuilder typeQuery = QueryBuilders.fieldQuery("type.name", "exon"); BoolQueryBuilder exonQuery = QueryBuilders.boolQuery() .must(parentQuery) .must(relationshipQuery) .must(typeQuery); SearchRequestBuilder builder = connection .getClient() .prepareSearch(connection.getIndex()) .setTypes(connection.getFeatureType()); SearchResponse response = builder .setQuery(exonQuery) .setExplain(true) .execute() .actionGet(); for (SearchHit hit : response.getHits()) { String source = hit.sourceAsString(); try { Exon e = (Exon) jsonIzer.fromJson(source, Exon.class); //logger.info("adding exon " + e.uniqueName + " to " + transcript.uniqueName); exons.add(e); } catch (Exception e) { logger.warn(e.getMessage()); continue; } } return exons; } // @Override // public LocatedFeature getOfType(String uniqueName, Integer organism_id, // String name, String type) { // // BoolQueryBuilder booleanQuery = QueryBuilders.boolQuery(); // // booleanQuery.must(QueryBuilders.fieldQuery("uniqueName", escape(uniqueName))); // // if (type != null) { // booleanQuery.must(QueryBuilders.fieldQuery("type.name", escape(type))); // } // // if (organism_id != null) { // booleanQuery.must(QueryBuilders.fieldQuery("organism_id", organism_id)); // } // // if (name != null) { // booleanQuery.must(QueryBuilders.fieldQuery("name", escape(name))); // } // // List features = null; // // if (type != null) { // if (type.equals("gene")) { // features = (List<Gene>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, Gene.class); // } else if (type.equals("mRNA")) { // features = (List<Transcript>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, Transcript.class); // } else if (type.equals("exon")) { // features = (List<Exon>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, Exon.class); // } else { // features = (List<LocatedFeature>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, LocatedFeature.class); // } // } else { // features = (List<LocatedFeature>) getAllMatches(connection.getIndex(), connection.getFeatureType(), booleanQuery, LocatedFeature.class); // } // // // // return (LocatedFeature) features.get(0); // } @Override public List<Synonym> synonyms(Feature feature) { //String resultFeatureJson = this.getFromElastic(connection.getIndex(), connection.getFeatureType(), feature.uniqueName, new String[] {"synonyms"} ); //Feature resultFeature = this.getFeatureFromJson(resultFeatureJson); return feature.synonyms; } private Set<String> ofType(List<Cvterm> ofType) { Set<String> types = new HashSet<String>(); if (ofType != null) { for (Cvterm type : ofType) { types.add(type.name); } } return types; } // TODO - untested @Override public List<Feature> parents(Feature feature,List<Cvterm> relationships) { Set<String> types = this.ofType(relationships); List<Feature> parents = new ArrayList<Feature>(); try { LocatedFeature f = (LocatedFeature) jsonIzer.fromJson(this.getFromElastic(connection.getIndex(), connection.getFeatureType(), feature.uniqueName), LocatedFeature.class); if (f.parent == null || f.parentRelationshipType == null) { return parents; } if (types.size() > 0 && (! types.contains(f.parentRelationshipType))) return parents; LocatedFeature p = (LocatedFeature) jsonIzer.fromJson(this.getFromElastic(connection.getIndex(), connection.getFeatureType(), f.parent), LocatedFeature.class); if (p != null) { // FeatureRelationship r = new FeatureRelationship(); // r.object = p; // r.type = new Cvterm(f.parentRelationshipType); // parents.add(r); // parents are objects p.relationshipType = new Cvterm(f.parentRelationshipType); parents.add(p); } } catch (Exception e) { e.printStackTrace(); } return parents; } // TODO - untested @Override public List<Feature> children(Feature feature,List<Cvterm> relationships) { Set<String> types = this.ofType(relationships); List<Feature> children = new ArrayList<Feature>(); try { //String escaped = LUCENE_PATTERN.matcher(feature.uniqueName).replaceAll(REPLACEMENT_STRING); // Using a standard term query was retrieving matches that had the same prefix // SearchRequestBuilder srb = connection.getClient().prepareSearch(index).setQuery (QueryBuilders.fieldQuery("parent", escaped)); // this is the closest I think I can get to an exact match query... // by encapsulating the query in quotes, and making sure the phrase slop is 0 String queryString = String.format("parent:\"%s\"", this.escape(feature.uniqueName)); logger.debug(queryString); SearchResponse response = connection.getClient() .prepareSearch(connection.getIndex()) .setTypes(connection.getFeatureType()) .setQuery (QueryBuilders.queryString(queryString).phraseSlop(0)) .execute() .actionGet(); for (SearchHit hit : response.getHits()) { try { LocatedFeature child = (LocatedFeature) jsonIzer.fromJson(hit.sourceAsString(), LocatedFeature.class); logger.info(" - " + child.uniqueName + " parent: " + child.parent); // make sure we only exact matches if (! child.parent.equals(feature.uniqueName)) { logger.warn(" SKIPPING"); continue; } if (types.size() > 0 && (! types.contains(child.parentRelationshipType)) ) continue; // FeatureRelationship r = new FeatureRelationship(); // r.subject = child; // r.type = new Cvterm(child.parentRelationshipType); // children are subjects child.relationshipType = new Cvterm(child.parentRelationshipType); children.add(child); } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e) { logger.error(e); } return children; } @Override public List<LocatedFeature> domains(Feature feature) { // TODO Auto-generated method stub return null; } @Override public List<Dbxref> dbxrefs(Feature feature) { // TODO Auto-generated method stub return null; } @Override public List<Orthologue> orthologues(Feature feature) { // TODO Auto-generated method stub return null; } // public static String getIndex() { // return "features"; // } // // // public static String getType() { // return "Feature"; // } }