package org.genedb.db.loading.auxiliary;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.feature.ModifiedAminoAcidFeature;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.feature.Transcript;
import org.gmod.schema.mapped.CvTerm;
import org.gmod.schema.mapped.Db;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureLoc;
import org.gmod.schema.mapped.FeatureProp;
import org.hibernate.Session;
public class PhosphopeptideLoader extends Loader {
private static final Logger logger = Logger.getLogger(PhosphopeptideLoader.class);
private int n = 0;
private String delimiter = ",";
private boolean delete = false;
private CvTerm propType;
private class Instruction {
private String geneName;
private int position;
Instruction(String line) {
String[] split = line.split(delimiter);
geneName = split[0];
position = Integer.parseInt(split[1]);
}
public String toString() {
return String.format("%s(%s)", geneName, position);
}
}
@Override
protected void doLoad(InputStream inputStream, Session session) throws IOException {
logger.info("Loading .... " + inputStream);
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line;
LinkedHashMap<String, List<Instruction>> instructions = new LinkedHashMap<String, List<Instruction>>();
// to catch any format errors prior to loading, preparse the file
while ((line = reader.readLine()) != null) {
logger.info(String.format("%s ... %s ... %s ... %s", line, line.length(), line.contains(delimiter), delimiter));
if (line.length() > 0 && line.contains(delimiter)) {
logger.info(line);
try {
Instruction instruction = new Instruction(line);
if (!instructions.containsKey(instruction.geneName)) {
instructions.put(instruction.geneName, new ArrayList<Instruction>());
}
instructions.get(instruction.geneName).add(instruction);
logger.info(instruction);
} catch (NumberFormatException nfe) {
logger.warn(String.format("Could not extract position from line '%s'", line));
}
}
}
propType = cvDao.getCvTermByNameAndCvName("phosphate binding", "molecular_function");
for (Entry<String, List<Instruction>> geneInstructions : instructions.entrySet()) {
for (Instruction instruction : geneInstructions.getValue()) {
Feature feature = sequenceDao.getFeatureByUniqueName(instruction.geneName, Feature.class);
logger.info(feature);
if (feature == null) {
logger.warn(String.format("%s is not a feature, skipping", instruction.geneName));
continue;
}
if (feature instanceof AbstractGene) {
createOrDestroy(session, instruction, (AbstractGene) feature);
} else if (feature instanceof Transcript) {
createOrDestroy(session, instruction, (Transcript) feature);
} else if (feature instanceof Polypeptide) {
createOrDestroy(session, instruction, (Polypeptide) feature);
} else {
logger.warn(String.format("%s is not a gene model feature, skipping", instruction.geneName));
continue;
}
}
}
session.flush();
session.clear();
}
private void createOrDestroy(Session session, Instruction instruction, AbstractGene gene) {
for (Transcript transcript : gene.getTranscripts()) {
createOrDestroy(session, instruction, transcript);
}
}
private void createOrDestroy(Session session, Instruction instruction, Transcript transcript) {
Polypeptide polypeptide = transcript.getPolypeptide();
if (polypeptide != null) {
createOrDestroy(session, instruction, polypeptide);
} else {
logger.warn(String.format("Could not find a polypeptide on %s", transcript.getUniqueName()));
}
}
private void createOrDestroy(Session session, Instruction instruction, Polypeptide polypeptide) {
boolean found = false;
for (FeatureLoc loc : polypeptide.getAminoAcidFeatureLocs(ModifiedAminoAcidFeature.class)) {
if (loc.getFmin().equals(instruction.position)) {
found = true;
// easiest to delete it when we have hold of the featureloc
if (delete) {
ModifiedAminoAcidFeature maaf = (ModifiedAminoAcidFeature) loc.getFeature();
logger.info(String.format("Deleting %s %s", instruction.geneName, instruction.position));
session.delete(loc);
session.delete(maaf);
// we don't return, there may be more than one at the same position
//return;
}
}
}
// can't find it, instructed not to delete it, therefore must create it
if (!found && !delete) {
logger.info(String.format("Creating %s %s", instruction.geneName, instruction.position));
// Db db = generalDao.getDbByName("Phosphopeptides");
//
// if (db == null) {
// db = new Db("phosphopeptides", "phosphopeptides on GeneDB", delimiter, delimiter);
// }
ModifiedAminoAcidFeature maaf = new ModifiedAminoAcidFeature(polypeptide.getOrganism(), polypeptide.getUniqueName() + ":phosphopeptide:" + instruction.position, false, false, new Timestamp(new Date().getTime()));
FeatureLoc floc = new FeatureLoc(polypeptide, maaf, instruction.position, false, instruction.position, false, (short) 0, 0, 0, 0);
FeatureProp prop = new FeatureProp(maaf, propType, "Phosphopeptide position at " + instruction.position, 0);
session.persist(maaf);
session.persist(floc);
session.persist(prop);
}
if (n % 50 == 1) {
logger.info("Clearing session");
session.flush();
session.clear();
}
n++;
}
@Override
protected Set<String> getOptionNames() {
Set<String> options = new HashSet<String>();
Collections.addAll(options, "delimiter", "delete");
return options;
}
@Override
protected boolean processOption(String optionName, String optionValue) {
logger.info(String.format("Setting option: '%s' :: '%s'", optionName, optionValue));
if (optionName.equals("delimiter")) {
delimiter = optionValue;
return true;
}
if (optionName.equals("delete")) {
delete = Boolean.parseBoolean(optionValue);
return true;
}
return false;
}
}