package org.genedb.web.mvc.controller.download; import java.util.Collection; import java.util.Map; import java.util.Set; import java.util.HashSet; import org.apache.log4j.Logger; import org.genedb.db.dao.SequenceDao; import org.genedb.querying.tmpquery.GeneDetail; import org.genedb.web.mvc.model.DTOFactory; import org.genedb.web.mvc.model.FeatureDTO; import org.gmod.schema.mapped.Feature; import org.gmod.schema.mapped.FeatureCvTerm; import org.gmod.schema.mapped.FeatureLoc; import org.gmod.schema.mapped.Synonym; import org.gmod.schema.feature.Transcript; import org.gmod.schema.feature.AbstractGene; import org.gmod.schema.feature.Polypeptide; import org.springframework.util.StringUtils; public class GeneDetailFieldValueExctractor { private Logger logger = Logger.getLogger(GeneDetailFieldValueExctractor.class); private String fieldInternalSeparator; private String blankField; private GeneDetail entry; //private BerkeleyMapFactory bmf; private SequenceDao sequenceDao; //private FeatureDTOAdaptor adaptor; private FeatureDTO dto; private int featureId; private String systematicId; private Feature feature; private Map<String, Feature> features; public GeneDetailFieldValueExctractor(GeneDetail entry, SequenceDao sequenceDao, Map<String,Feature>features, String fieldInternalSeparator, String blankField) { this.entry = entry; this.sequenceDao = sequenceDao; this.features = features; this.fieldInternalSeparator = fieldInternalSeparator; this.blankField = blankField; featureId = entry.getFeatureId(); systematicId = entry.getSystematicId(); // logger.error(systematicId); } public String getFieldValue (OutputOption outputOption) { // try first to fetch from lucene because it's faster String source = "lucene"; String fieldValue = getFieldValue(entry, outputOption); /* * NOTE this is commented out because the download processes run on the farm, and we don't want to be starting up cluster nodes everywhere... * must think if this is necessary anyway. * */ // if (fieldValue == null) { // source = "dto"; // fieldValue = getFieldValue(getAdaptor(), outputOption); // } if (fieldValue == null) { source = "hibernate"; fieldValue = getFieldValue(getFeature(), outputOption); } fieldValue = (fieldValue == null || fieldValue.equals("")) ? blankField : fieldValue; logger.debug(String.format("%s.%s (<- %s) = %s", systematicId, outputOption.name(), source, fieldValue)); return fieldValue; } public static boolean availableFromLucene(OutputOption outputOption) { boolean available = false; switch (outputOption) { case ORGANISM: available = true; break; case SYS_ID: available = true; break; case PRIMARY_NAME: available = true; break; case PRODUCT: available = true; break; case GENE_TYPE: available = true; break; case SYNONYMS: available = true; break; case PREV_SYS_ID: break; case CHROMOSOME: available = true; break; case LOCATION: available = true; break; case EC_NUMBERS: break; case GO_TERMS: break; case NUM_TM_DOMAINS: break; case SIG_P: break; case GPI_ANCHOR: break; case MOL_WEIGHT: break; case ISOELECTRIC_POINT: break; case GO_IDS: break; case PFAM_IDS: break; case INTERPRO_IDS: break; } return available; } private String getFieldValue(GeneDetail entry, OutputOption outputOption) { String fieldValue = null; switch (outputOption) { case ORGANISM: fieldValue = entry.getTaxonDisplayName(); break; case SYS_ID: fieldValue = entry.getSystematicId(); break; case PRIMARY_NAME: fieldValue = entry.getPrimaryName(); break; case PRODUCT: fieldValue = entry.getProduct(); break; case GENE_TYPE: fieldValue = entry.getType(); break; case SYNONYMS: fieldValue = StringUtils.collectionToDelimitedString(entry.getSynonyms(), fieldInternalSeparator); break; case PREV_SYS_ID: break; case CHROMOSOME: fieldValue = entry.getTopLevelFeatureName(); break; case LOCATION: fieldValue = entry.getLocation(); break; case EC_NUMBERS: break; case GO_TERMS: break; case NUM_TM_DOMAINS: break; case SIG_P: break; case GPI_ANCHOR: break; case MOL_WEIGHT: break; case ISOELECTRIC_POINT: break; case GO_IDS: break; case PFAM_IDS: break; case INTERPRO_IDS: break; } return fieldValue; } // /** // * Gets field values for transcripts. Because this gets called several times for each transcript, // * and in each case an adaptor is needed, this method takes an adaptor parameter rather than // * the transcript itself, so as to be able to reuse the same adaptor instance. // * @param adaptor // * @param outputOption // * @return // */ // private String getFieldValue(FeatureDTOAdaptor adaptor, OutputOption outputOption) { // String fieldValue = null; // // if (adaptor != null) { // // switch (outputOption) { // case CHROMOSOME: // fieldValue = adaptor.getContig(); // break; // case EC_NUMBERS: // fieldValue = adaptor.getEc(); // break; // case GENE_TYPE: // fieldValue = adaptor.getType(); // break; // case GO_IDS: // fieldValue = adaptor.getGO(); // break; // case GPI_ANCHOR: // fieldValue = adaptor.getGpiAnchor(); // break; // case INTERPRO_IDS: // fieldValue = adaptor.getInterpro(); // break; // case ISOELECTRIC_POINT: // fieldValue = adaptor.getIsoelectricPoint(); // break; // case LOCATION: // fieldValue = adaptor.getLocation(); // break; // case MOL_WEIGHT: // fieldValue = adaptor.getMolWeight(); // break; // case NUM_TM_DOMAINS: // fieldValue = adaptor.getNumTM(); // break; // case ORGANISM: // fieldValue = adaptor.getOrganism(); // break; // case PFAM_IDS: // fieldValue = adaptor.getPfam(); // break; // case PREV_SYS_ID: // fieldValue = adaptor.getPrevIds(); // break; // case PRIMARY_NAME: // fieldValue = adaptor.getPrimaryName(); // break; // case PRODUCT: // fieldValue = adaptor.getProduct(); // break; // case SIG_P: // fieldValue = adaptor.isSigP(); // break; // case SYNONYMS: // fieldValue = adaptor.getSynonyms(); // break; // case SYS_ID: // fieldValue = adaptor.getId(); // break; // } // // } // // return fieldValue; // } private Set<String> getGosForFeature(Feature feature) { Set<String> gos = new HashSet<String>(); gos.addAll(populateFromFeatureCvTerms(feature, "biological_process")); gos.addAll(populateFromFeatureCvTerms(feature, "molecular_function")); gos.addAll(populateFromFeatureCvTerms(feature, "cellular_component")); return gos; } private Collection<String> populateFromFeatureCvTerms(Feature feature, String cvNamePrefix) { Set<String> ret = new HashSet<String>(); for (FeatureCvTerm fct : feature.getFeatureCvTermsFilteredByCvNameStartsWith(cvNamePrefix)) { ret.add(fct.getCvTerm().getName()/* , fct.getCvTerm().getDbXRef().getAccession())*/); } return ret; } private Collection<String> getGosForTranscript(Transcript tr) { return getGosForFeature((Feature) (tr.getPolypeptide())); } private Collection<String> getGosForGene(AbstractGene gene) { Set<String> gos = new HashSet<String>(); for (Transcript tr : gene.getTranscripts()) { gos.addAll(getGosForTranscript(tr)); } return gos; } /** * Gets the field value for any Feature. Used when the feature in question is not a transcript. * @param feature * @param outputOption * @return */ private String getFieldValue(Feature feature, OutputOption outputOption) { String fieldValue = null; if (feature != null) { switch (outputOption) { case CHROMOSOME: FeatureLoc top = feature.getRankZeroFeatureLoc(); Feature topLevelFeature = top.getSourceFeature(); fieldValue = topLevelFeature.getDisplayName(); break; case GENE_TYPE: fieldValue = feature.getType().getName(); break; case LOCATION: FeatureLoc top2 = feature.getRankZeroFeatureLoc(); fieldValue = top2.getFmin() + " - " + top2.getFmax() + ( (top2.getStrand() < 0 ) ? " (reverse strand)" : "" ); break; case ORGANISM: fieldValue = feature.getOrganism().getCommonName(); break; case PRIMARY_NAME: fieldValue = feature.getUniqueName(); break; case GO_TERMS: Collection<String> gos = null; /* For genes and transcripts, collect their product GOs. */ if (feature.getType().getName().equals("gene")) { gos = getGosForGene((AbstractGene) feature); } else if (feature.getType().getName().equals("mRNA")) { gos = getGosForTranscript((Transcript) feature); } else gos = getGosForFeature(feature); if (gos != null) { fieldValue = StringUtils.collectionToDelimitedString(gos, fieldInternalSeparator); } break; case SYNONYMS: Collection<Synonym> synonyms = feature.getSynonyms(); for (Synonym synonym : synonyms) { fieldValue += synonym.getName() + fieldInternalSeparator; } break; case SYS_ID: fieldValue = feature.getUniqueName(); break; } } return fieldValue; } // private FeatureDTOAdaptor getAdaptor() { // // if (dto == null) { // dto = dtoFactory.getDtoByName(feature); // } // // // if the dto is still null, then trying to make generate adaptor will raise an exception, // if (dto == null) { // return null; // } // // //logger.debug(this.systematicId + " -- " + dto); // // if (adaptor == null) { // adaptor = new FeatureDTOAdaptor(dto, fieldInternalSeparator); // } // // return adaptor; // } public Feature getFeature() { if (features != null) { if (features.containsKey(systematicId)) { return features.get(systematicId); } } if (feature == null) { feature = sequenceDao.getFeatureByUniqueName(systematicId, Feature.class); } return feature; } }