package org.genedb.crawl.elasticsearch.index.gff;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.genedb.crawl.elasticsearch.index.gff.GFFFeature.GFFAttributeMap;
import org.genedb.crawl.elasticsearch.index.gff.GFFFeature.GFFAttributeMapList;
import org.genedb.crawl.model.Coordinates;
import org.genedb.crawl.model.Cv;
import org.genedb.crawl.model.Cvterm;
import org.genedb.crawl.model.Db;
import org.genedb.crawl.model.Dbxref;
import org.genedb.crawl.model.Feature;
import org.genedb.crawl.model.Property;
import org.genedb.crawl.model.LocatedFeature;
import org.genedb.crawl.model.Organism;
import org.genedb.crawl.model.Orthologue;
import org.genedb.crawl.model.Pub;
public class FeatureBeanFactory {
private Logger logger = Logger.getLogger(FeatureBeanFactory.class);
private LocatedFeature feature;
public LocatedFeature getFeature() {
return feature;
}
public FeatureBeanFactory(Organism organism, String line) {
GFFFeature gffFeature = new GFFFeature(line);
feature = new LocatedFeature();
feature.organism_id = organism.ID;
feature.uniqueName = gffFeature.id;
// for easy querying we are storing the locations of this feature as properties
feature.fmin = gffFeature.start;
feature.fmax = gffFeature.end;
feature.region = gffFeature.seqid;
feature.phase = gffFeature.phase.getPhase();
feature.strand = gffFeature.strand.getStrandInt();
// but for web service display we are also storing a coordinates array
Coordinates coordinates = new Coordinates();
feature.coordinates = new ArrayList<Coordinates>();
feature.coordinates.add(coordinates);
coordinates.region = gffFeature.seqid;
coordinates.toplevel = true;
if (gffFeature.phase != null) {
coordinates.phase = gffFeature.phase.getPhase();
coordinates.strand = gffFeature.strand.getStrandInt();
}
coordinates.fmin = gffFeature.start;
coordinates.fmax = gffFeature.end;
Cvterm type = new Cvterm();
type.name = gffFeature.type;
feature.type = type;
feature.properties = new ArrayList<Property>();
for (Entry<String, Object> entry : gffFeature.attributes.map.entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();
if (value instanceof GFFAttributeMapList) {
GFFAttributeMapList submap = (GFFAttributeMapList) entry.getValue();
if(key.equals("orthologous_to")) {
logger.debug("Scanning orthologues for " + feature.uniqueName);
Orthologue orthologue = null;
if (feature.orthologues == null) {
feature.orthologues = new ArrayList<Orthologue>();
}
for (GFFAttributeMap submapitem : submap.list) {
for (Entry<String, Object> subattr : submapitem.map.entrySet()) {
logger.debug(String.format("Orthologue %s >> %s.", subattr.getKey(), (String) subattr.getValue()));
// a new orthologue must be generated for each link.
if (subattr.getKey().contains("link")) {
orthologue = new Orthologue();
feature.orthologues.add(orthologue);
String[] vals = ((String) subattr.getValue()).split(" ");
if (vals.length == 2) {
orthologue.uniqueName = vals[0];
if (vals[1].contains("type=")) {
orthologue.orthologyType = vals[1].replace("type=", "");
}
} else {
orthologue.uniqueName = vals[1];
}
} if (subattr.getKey().equals("cluster_name")) {
orthologue.clusterName = (String) subattr.getValue();
} else if (subattr.getKey().equals("program")) {
orthologue.program = (String) subattr.getValue();
} else if (subattr.getKey().equals("product")) {
orthologue.addProduct((String) subattr.getValue());
}
}
}
} else if (key.equals("go")) {
Cvterm cvterm = null;
// feature.addTerm(cvterm);
for (GFFAttributeMap submapitem : submap.list) {
for (Entry<String, Object> subattr : submapitem.map.entrySet()) {
String subattrval = (String) subattr.getValue();
if (subattr.getKey().equals("aspect")) {
cvterm = new Cvterm();
feature.addTerm(cvterm);
Cv cv = new Cv();
if (subattrval.equals("P")) {
cv.name = "biological_process";
} else if (subattrval.equals("C")) {
cv.name = "cellular_component";
} else if (subattrval.equals("F")) {
cv.name = "molecular_function";
}
cvterm.cv = cv;
} else if (subattr.getKey().equals("GOid")) {
if (subattrval.contains("GO:")) {
cvterm.accession = subattrval.replace("GO:", "");
} else {
cvterm.accession = subattrval;
}
} else if (subattr.getKey().equals("term")) {
cvterm.name = subattrval;
} else if (subattr.getKey().equals("db_xref")) {
Pub pub = new Pub();
pub.accession = subattrval;
pub.database = "pubmed";
pub.uniqueName = subattrval;
cvterm.addPub(pub);
}
}
}
} else {
for (GFFAttributeMap submapitem : submap.list) {
for (Entry<String, Object> subattr : submapitem.map.entrySet()) {
if (subattr.getValue() instanceof String) {
logger.debug(String.format("Subattribute %s : %s.", subattr.getKey(), (String) subattr.getValue()));
Property fp = new Property();
fp.name = key + "." + subattr.getKey();
fp.value = (String) subattr.getValue();
feature.properties.add(fp);
}
}
}
}
} else {
String stringValue = (String) value;
//logger.debug(key + ":" + value);
if ( key.equals("derives_from") || key.equals("parent") || key.equals("part_of") ) {
feature.parent = stringValue;
if (key.equals("parent")) {
feature.parentRelationshipType = "part_of";
} else {
feature.parentRelationshipType = key;
}
logger.trace(String.format("Adding %s as a parent of %s, relationship %s.", stringValue, feature.uniqueName, key));
} else if (key.equals("dbxref")) {
String[] refs = stringValue.split(",");
for (String ref : refs) {
String[] refSplit = ref.split(":");
if (refSplit.length == 2) {
Db db = new Db();
db.name = refSplit[0];
Dbxref dbxref = new Dbxref();
//dbxref.database = refSplit[0];
dbxref.accession = refSplit[1];
dbxref.db = db;
feature.addDbxref(dbxref);
}
}
} else if (key.equals("product")) {
if (stringValue.contains("term=")) {
String product = stringValue.replace("term=", "");
feature.addProduct(product);
} else {
feature.addProduct(stringValue);
}
} else if (key.equals("timelastmodified")) {
try {
SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy hh:mm:ss z");
Date date = df.parse(stringValue);
feature.timelastmodified = date;
} catch (ParseException e) {
logger.error("Could not parse date : " + stringValue);
e.printStackTrace();
}
//logger.error("feature.timelastmodified " + feature.timelastmodified );
} else if (key.equals("isobsolete")) {
feature.isObsolete = Boolean.parseBoolean(stringValue);
} else if (key.equals("translation")) {
feature.residues = stringValue;
} else if (key.equals("translation")) {
} else {
Property fp = new Property();
fp.name = key;
fp.value = stringValue;
feature.properties.add(fp);
}
}
}
}
}