package org.nextprot.api.tasks.solr.indexer.entry.impl;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.nextprot.api.core.domain.DbXref;
import org.nextprot.api.core.domain.DbXref.DbXrefProperty;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.core.domain.Interactant;
import org.nextprot.api.core.domain.Interaction;
import org.nextprot.api.core.domain.Isoform;
import org.nextprot.api.core.domain.Overview;
import org.nextprot.api.core.domain.Publication;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationEvidence;
import org.nextprot.api.core.service.fluent.EntryConfig;
import org.nextprot.api.solr.index.EntryIndex.Fields;
import org.nextprot.api.tasks.solr.indexer.entry.EntryFieldBuilder;
import org.nextprot.api.tasks.solr.indexer.entry.FieldBuilder;
@EntryFieldBuilder
public class XrefFieldBuilder extends FieldBuilder {
@Override
protected void init(Entry entry) {
String[] extraNameCat = { "entry name", "family name", "allergen name", "reaction ID", "toxin name" };
// Xrefs
List<DbXref> xrefs = entry.getXrefs();
for (DbXref xref : xrefs) {
String acc = xref.getAccession();
String db = xref.getDatabaseName();
if (db.equals("neXtProtSubmission")) continue;
if (db.equals("HPA") && !acc.contains("ENSG")) { // HPA with ENSG are for expression
//System.err.println("HPA ab: " + acc);
addField(Fields.ANTIBODY, acc);
}
if (db.equals("Ensembl")) {
addField(Fields.ENSEMBL, acc);
}
// There is an inconsistency in the way EMBL xref properties are managed:
// for genomic sequences EAW78410.1 -> molecule type=protein, the pid appears as an individual xref
// and the EMBL acc is a property EAW78410.1 -> genomic sequence ID=CH471052
// but for mrnas BC040557 -> protein sequence ID=AAH40557.1, the pid is just a property of the xref...
if (!(db.equals("PeptideAtlas") || db.equals("SRMAtlas"))) { // These are indexed under the 'peptide' field
//if(!acc.startsWith("EBI-")) // These are indexed under the 'interaction' field
//addField(Fields.XREFS,db + ":" + acc + ", " + acc);
//if(db.equals("IntAct"))
if(db.equals("EMBL")) {
//System.err.println(db + ":" + acc);
String propvalue = xref.getPropertyValue("protein sequence ID");
if(propvalue != null) {
//System.err.println("indexing 'protein sequence ID' " + propvalue);
//addField(Fields.XREFS,"protein sequence ID:" + propvalue + ", " + propvalue);
addField(Fields.XREFS,"EMBL:" + propvalue + ", " + propvalue);
//System.err.println("indexing 'EMBL' " + acc);
addField(Fields.XREFS,"EMBL:" + acc + ", " + acc);
}
else {
propvalue = xref.getPropertyValue("genomic sequence ID");
if(propvalue != null) {
//System.err.println("indexing 'genomic sequence ID' " + propvalue);
//addField(Fields.XREFS,"genomic sequence ID:" + propvalue + ", " + propvalue);
//System.err.println("indexing 'EMBL' " + acc); // This is definitely wrong, should be next line
addField(Fields.XREFS,"EMBL:" + acc + ", " + acc);
//System.err.println("indexing 'protein sequence ID' " + acc);
}
else if (!acc.contains(".")) {
//System.err.println("indexing 'EMBL' " + acc);
addField(Fields.XREFS,"EMBL:" + acc + ", " + acc);
}
}
}
else {
addField(Fields.XREFS,db + ":" + acc + ", " + acc);
for(String category: extraNameCat) {
String extraName = xref.getPropertyValue(category);
if(extraName != null) { // Can be found for dbs: "InterPro", "Pfam", "PROSITE"), "TIGRFAMs", "SMART", "PRINTS", "HAMAP",
// "PeroxiBase", "PIRSF", "PIR", "TCDB", "CAZy", "ESTHER", UniPathway
addField(Fields.XREFS,db + ":" + extraName + ", " + extraName);
break;
}
}
}
}
}
// It is weird to have to go thru this to get the CAB antibodies, they should come with getXrefs()
//Set<String> CABSet = new HashSet<String>();
List<Annotation> annots = entry.getAnnotations();
for (Annotation currannot : annots) {
String category = currannot.getCategory();
//System.err.println("Annot: " + category);
/*if (category.equals("expression info") || category.equals("subcellular location")) {
List<AnnotationEvidence> evlist = currannot.getEvidences();
for (AnnotationEvidence evidence : evlist) {
String AB = evidence.getPropertyValue("antibodies acc");
if(AB != null && AB.contains("CAB")) {
// Several may appear (eg:CAB025488; HPA028825; CAB058697) , keep only CABs
//System.err.println("AB: " + AB);
Set<String> localCABSet = new HashSet<String>(Arrays.asList(AB.split("; ")));
for(String CAB : localCABSet)
if(CAB.startsWith("CAB"))
CABSet.add(CAB);
}
}
}*/
//else if (category.equals("pathway")) { // Same remark
if (category.equals("pathway")) { // Same remark
//DbXref parentXref = currannot.getParentXref();
addField(Fields.XREFS,"Pathway:" + currannot.getDescription() + ", " + currannot.getDescription());
//System.err.println(parentXref.getDatabaseName());
//System.err.println(currannot.getDescription());
}
else if (category.equals("disease")) { // Same remark
DbXref parentXref = currannot.getParentXref();
if(parentXref != null && parentXref.getDatabaseName().equals("Orphanet")) {
String disName = parentXref.getPropertyValue("disease");
addField(Fields.XREFS,"Disease:" + disName + ", " + disName);
//System.err.println(disName);
}
}
/*else if (category.equals("subcellular location")) { // Same remark, this one is terrible
List<AnnotationEvidence> evlist = currannot.getEvidences();
for (AnnotationEvidence evidence : evlist) {
String AB = evidence.getPropertyValue("antibodies acc");
if(AB != null && AB.contains("CAB")) {
// Several may appear (eg:CAB025488; HPA028825; CAB058697) , keep only CABs
//System.err.println("AB: " + AB);
Set<String> localCABSet = new HashSet<String>(Arrays.asList(AB.split("; ")));
for(String CAB : localCABSet)
if(CAB.startsWith("CAB"))
CABSet.add(CAB);
}
}
}*/
else if (category.equals("SmallMoleculeInteraction")) { // Same remark
addField(Fields.XREFS,"generic name:" + currannot.getDescription() + ", " + currannot.getDescription());
//System.err.println(currannot.getDescription());
//}
}
}
/*if(CABSet.size() > 0)
for (String CAB : CABSet) {
addField(Fields.ANTIBODY, CAB);
addField(Fields.XREFS, "HPA:" + CAB + ", " + CAB);
}*/
// Isoform ids
List<Isoform> isoforms = entry.getIsoforms();
for (Isoform iso : isoforms) {
String isoId = iso.getUniqueName().substring(3);
//System.err.println(isoId);
addField(Fields.XREFS,"isoform ID:" + isoId + ", " + isoId);
}
// Xrefs to publications (PubMed, DOIs)
for (Publication currpubli : entry.getPublications()) {
Set<DbXref> pubxrefs = currpubli.getDbXrefs();
for (DbXref pubxref : pubxrefs) {
String acc = pubxref.getAccession().trim(); // It happens to have a trailing \t (like 10.1080/13547500802063240 in NX_P14635)
String db = pubxref.getDatabaseName();
//if (!db.equals("neXtProtSubmission"))
addField(Fields.XREFS,db + ":" + acc + ", " + acc);
}
}
}
@Override
public Collection<Fields> getSupportedFields() {
return Arrays.asList(Fields.XREFS, Fields.ENSEMBL, Fields.ANTIBODY);
}
}