package org.genedb.db.loading.auxiliary;
import org.genedb.db.loading.GoEvidenceCode;
import org.genedb.db.loading.GoInstance;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.mapped.DbXRef;
import org.apache.log4j.Logger;
import org.hibernate.Session;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class GOALoader extends Loader {
private static final Logger logger = Logger.getLogger(GOALoader.class);
Boolean goTermErrorsAreNotFatal = true;
@Override
protected Set<String> getOptionNames() {
Set<String> options = new HashSet<String>();
Collections.addAll(options, "go-term-errors-are-not-fatal");
return options;
}
@Override
protected boolean processOption(String optionName, String optionValue) {
if (optionName.equals("go-term-errors-are-not-fatal")) {
if (optionValue == null) {
goTermErrorsAreNotFatal = true;
} else {
goTermErrorsAreNotFatal = Boolean.valueOf(optionValue);
}
return true;
}
return false;
}
public void doLoad(InputStream inputStream, Session session) throws IOException {
GOAssociationFile file = new GOAssociationFile(inputStream);
int n=1;
for (GOHit hit: file.hits()) {
logger.info(String.format("[%d/%d] Processing GO for '%s'", n++, file.hits().size(), hit.getFeatureUniquename()));
loadHit(hit);
if (n % 50 == 1) {
logger.info("Clearing session");
session.clear();
}
}
}
private void loadHit(GOHit hit) {
Polypeptide polypeptide;
if (hit.getFeatureType().equals("gene")) {
polypeptide = getPolypeptideForGene(hit.getFeatureUniquename());
}
else if (hit.getFeatureType().equals("protein")) {
polypeptide = getPolypeptideByMangledName(hit.getFeatureUniquename());
}
else {
logger.error(String.format("Feature '%s' is of type %s not type gene or protein", hit.getFeatureUniquename(), hit.getFeatureType()));
return;
}
if (polypeptide == null) {
logger.error(String.format("Could not find polypeptide for key '%s'", hit.getFeatureUniquename()));
return;
}
//The processGO method takes all the essential information from a hit and creates the corresponding database entries
processGO(polypeptide, hit);
}
protected void processGO(Polypeptide polypeptide, GOHit hit)
throws RuntimeException {
try {
GoInstance goInstance = new GoInstance();
goInstance.setId(hit.getGoId());
goInstance.setDate(hit.getDate());
goInstance.setAttribution(hit.getCurator());
if (hit.getWithFrom() != null)
goInstance.setWithFrom(hit.getWithFrom());
if (hit.getQualifier() != null)
goInstance.addQualifier(hit.getQualifier());
try {
goInstance.setEvidence(GoEvidenceCode.valueOf(hit.getEvCode()));
} catch (IllegalArgumentException e) {
throw new RuntimeException(String.format(
"Failed to parse GO evidence code '%s'", hit.getEvCode()));
}
goInstance.setRef(hit.getDbxref());
String comment = "From GO association file";
DbXRef withFromDbxref = null;
if (hit.getWithFrom() != null) {
logger.debug(String.format("Adding withFrom '%s'", hit.getWithFrom()));
//withFrom is in the format DB:accession
withFromDbxref = objectManager.getDbXRef(hit.getWithFrom());
if (withFromDbxref == null) {
throw new RuntimeException(String.format("Error loading GO term: Db is not found for withFrom DbXRef '%s'", hit.getWithFrom()));
}
}
featureUtils.createGoEntries(polypeptide, goInstance, comment, withFromDbxref);
} catch (Exception e) {
if (goTermErrorsAreNotFatal) {
logger.error(String.format("Error loading GO term '%s'", e));
} else {
throw new RuntimeException("Error loading GO term", e);
}
}
}
}
/* Class corresponding to GO Association file */
class GOAssociationFile {
private static final Logger logger = Logger.getLogger(GOAssociationFile.class);
private List<GOHit> hits = new ArrayList<GOHit>();
public GOAssociationFile(InputStream inputStream) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line;
int lineNumber = 0;
while (null != (line = reader.readLine())) { //While not end of file
if(0 < line.length()){
lineNumber++;
StringBuilder sb = new StringBuilder(line);
sb.append('\n');
logger.trace(sb);
GOHit hit = new GOHit(lineNumber, line);
hits.add(hit);
}
}
}
public Collection<GOHit> hits() {
return hits;
}
}
/* Each 'hit' corresponds to a line in the .GO file */
class GOHit {
//File format for GO association file
// 1 DB required 1 SGD
// 2 DB_Object_ID required 1 S000000296
// 3 DB_Object_Symbol required 1 PHO3
// 4 Qualifier optional 0 or greater NOT
// 5 GO ID required 1 GO:0003993
// 6 DB:Reference (|DB:Reference) required 1 or greater SGD_REF:S000047763|PMID:2676709
// 7 Evidence code required 1 IMP
// 8 With (or) From optional 0 or greater GO:0000346
// 9 Aspect required 1 F
// 10 DB_Object_Name optional 0 or 1 acid phosphatase
// 11 DB_Object_Synonym (|Synonym) optional 0 or greater YBR092C
// 12 DB_Object_Type required 1 gene
// 13 taxon(|taxon) required 1 or 2 taxon:4932
// 14 Date required 1 20010118
// 15 Assigned_by required 1 SGD
// The columns we're interested in:
private static final int DB = 0;
private static final int DB_OBJECT_ID = 1;
private static final int DB_OBJECT_SYMBOL = 2;
private static final int QUALIFIER = 3;
private static final int GO_ID = 4;
private static final int DBXREF = 5;
private static final int EVIDENCE_CODE = 6;
private static final int WITH_FROM = 7;
private static final int ASPECT = 8;
private static final int DB_OBJECT_NAME = 9;
private static final int DB_OBJECT_SYNONYM = 10;
private static final int DB_OBJECT_TYPE = 11;
private static final int TAXON = 12;
private static final int DATE = 13;
private static final int ASSIGNED_BY = 14;
private String featureDb, featureUniquename, featureSymbol, qualifier, goId,
dbxref, evCode, withFrom, aspect, featureProduct, featureSynonym,
featureType, taxon, date, curator;
private int lineNumber;
public GOHit(int lineNumber, String row) {
this(lineNumber, row.split("\t"));
}
public GOHit(int lineNumber, String[] rowFields) {
this.lineNumber = lineNumber;
this.featureDb = rowFields[DB];
this.featureUniquename = rowFields[DB_OBJECT_ID];
this.featureSymbol = rowFields[DB_OBJECT_SYMBOL];
this.qualifier = rowFields[QUALIFIER];
this.goId = rowFields[GO_ID];
this.dbxref = rowFields[DBXREF];
this.evCode = rowFields[EVIDENCE_CODE];
this.withFrom = rowFields[WITH_FROM];
this.aspect = rowFields[ASPECT];
this.featureProduct = rowFields[DB_OBJECT_NAME];
this.featureSynonym = rowFields[DB_OBJECT_SYNONYM];
this.featureType = rowFields[DB_OBJECT_TYPE];
this.taxon = rowFields[TAXON];
this.date = rowFields[DATE];
this.curator = rowFields[ASSIGNED_BY];
}
public int getLineNumber() {
return lineNumber;
}
public String getFeatureDb() {
return featureDb;
}
public String getFeatureUniquename() {
return featureUniquename;
}
public String getFeatureSymbol() {
return featureSymbol;
}
public String getFeatureProduct() {
if (featureProduct.equals("")) {
return null;
}
return featureProduct;
}
public String getFeatureSynonym() {
if (featureSynonym.equals("")) {
return null;
}
return featureSynonym;
}
public String getFeatureType() {
return featureType;
}
public String getQualifier() {
if (qualifier.equals("")) {
return null;
}
return qualifier;
}
public String getGoId() {
return goId;
}
public String getEvCode() {
return evCode;
}
public String getDbxref() {
return dbxref;
}
public String getWithFrom() {
if (withFrom.equals("")) {
return null;
}
return withFrom;
}
public String getAspect() {
return aspect;
}
public String getCurator() {
return curator;
}
public String getDate() {
return date;
}
public String getTaxon() {
return taxon;
}
}