package org.genedb.db.loading.auxiliary; import org.gmod.schema.feature.Polypeptide; import org.apache.log4j.Logger; import org.hibernate.Session; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Adds feature properties to the polypeptide feature associated with a list of gene or protein systematic ids * The systematic ids should be listed, one per line in a file supplied on the command line * The type of the features listed (eg gene, polypeptide) can be specified using -Dload.featureType, * but the default is 'polypeptide' (optional) * * The type of the feature_property is specified using -Dload.propType (required) * The value of the feature_property is specified using -Dload.propValue and must be enclosed * in quotes if it contains spaces (required) * * -Dload.errorsAreNotFatal may be used if the file contains deprecated systematic ids (optional) * * Even if the file contains multiple copies of the same systematic id the feature_property will only be * added once per feature per load session * * Example of use: * ant add-featprops -Dconfig=bigtest2 -Dorganism=Pfalciparum -Dload.featureType=gene -Dload.propType=private * -Dload.propValue="Tinas private value" -Dload.errorsAreNotFatal=true -Dfile=featureprop.test * * @author te3 * */ public class AddFeatureProperty extends Loader { private static final Logger logger = Logger.getLogger(AddFeatureProperty.class); Boolean errorsAreNotFatal = true; Collection<String> featuresSeen = new HashSet<String>(); String featureType = "polypeptide"; String propType; String propValue; @Override protected Set<String> getOptionNames() { Set<String> options = new HashSet<String>(); Collections.addAll(options, "featureType", "propType", "propValue", "errorsAreNotFatal"); return options; } @Override protected boolean processOption(String optionName, String optionValue) { if (optionName.equals("featureType")) { featureType = optionValue; return true; } else if (optionName.equals("propType")) { propType = optionValue; return true; } else if (optionName.equals("propValue")) { propValue = optionValue; return true; } else if (optionName.equals("errorsAreNotFatal")) { errorsAreNotFatal = Boolean.valueOf(optionValue); return true; } return false; } public void doLoad(InputStream inputStream, Session session) throws IOException { inputFile file = new inputFile(inputStream); int n=1; for (featProp prop: file.props()) { logger.info(String.format("[%d/%d] Adding feature property for %s '%s'", n++, file.props().size(), featureType, prop.getFeatureUniquename())); loadprop(prop, session); if (n % 50 == 1) { logger.info("Clearing session"); session.flush(); session.clear(); } } } private void loadprop(featProp prop, Session session) { Polypeptide polypeptide; if (featureType.equals("gene")) { polypeptide = getPolypeptideForGene(prop.getFeatureUniquename()); } else if (featureType.equals("polypeptide") || featureType.equals("protein")) { polypeptide = getPolypeptideByMangledName(prop.getFeatureUniquename()); } else { logger.error(String.format("Feature type should be gene or polypeptide not %s", featureType)); return; } if (polypeptide == null) { logger.error(String.format("Could not find polypeptide for key '%s'", prop.getFeatureUniquename())); return; } //Add featureproperties if not already seen this polypeptide if (!featuresSeen.contains(polypeptide.getUniqueName())) { //String type = "curation"; //String value = "GO terms evidenced by RCA were predicted using the Extended Similarity Group software (PMID:18655063)"; //String type2 = "private"; //String value2 = "curator_dpd;date_20090805;feat terms added based on reviewed ESG output"; addCurations(polypeptide, propType, propValue, session); featuresSeen.add(polypeptide.getUniqueName()); } } private void addCurations(Polypeptide polypeptide, String type, String value, Session session) { int rank = polypeptide.getFeaturePropsFilteredByCvNameAndTermName("genedb_misc", type).size() + 1; logger.info(String.format("Adding /%s=\"%s\" with rank %d", type, value, rank)); try { session.persist(polypeptide.addFeatureProp(value, "genedb_misc", type, rank++)); } catch (Exception e) { if (errorsAreNotFatal) { logger.error(String.format("Error adding feature prop to feature '%s'", polypeptide.getUniqueName(), e)); } else { throw new RuntimeException("Error adding feature prop", e); } } } } /* Class corresponding to feat Association file */ class inputFile { private static final Logger logger = Logger.getLogger(inputFile.class); private List<featProp> props = new ArrayList<featProp>(); public inputFile(InputStream inputStream) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); String line; int lineNumber = 0; while (null != (line = reader.readLine())) { //While not end of file if(0 < line.length()){ lineNumber++; StringBuilder sb = new StringBuilder(line); sb.append('\n'); logger.trace(sb); featProp prop = new featProp(lineNumber, line); props.add(prop); } } } public Collection<featProp> props() { return props; } } /* Each featProp corresponds to a line in the input file */ class featProp { private String featureUniquename; private int lineNumber; public featProp(int lineNumber, String row) { this.lineNumber = lineNumber; this.featureUniquename = row; } public int getLineNumber() { return lineNumber; } public String getFeatureUniquename() { return featureUniquename; } }