package org.genedb.db.loading.auxiliary;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.mapped.CvTerm;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureSynonym;
import org.gmod.schema.mapped.Synonym;
import org.hibernate.Session;
/**
* A bulk feature renaming utility. Takes a delimited file as input. First column must contain old names. Second column must contain new names.
*
* @author gv1
*
*/
public class RenameFeature extends Loader {
private static final Logger logger = Logger.getLogger(RenameFeature.class);
private String delimiter = "\t";
private boolean matchPrefixOnly = false;
private Set<String> newUniqueNames = new HashSet<String>();
/*
* Used the following for testing :
*
--cleanup with.... (in my test db, names starting with test are expendables)
delete from feature where uniquename like 'test%';
delete from synonym where name like 'test%';
--ready for testing with...
insert into feature (uniquename, organism_id, type_id) values ('test1', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test2', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test3', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test4', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test4:pep', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test4:pep.1', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('test4:mRNA.1', 213, 792);
insert into feature (uniquename, organism_id, type_id) values ('testX', 213, 792);
insert into synonym
(name, synonym_sgml, type_id)
values
('test4', 'test4', (select cvterm_id from cvterm where name = 'previous_systematic_id'));
insert into feature_synonym
(synonym_id,
feature_id,
pub_id,
is_current)
values
(
(select synonym_id from synonym where name = 'test4'),
(select feature_id from feature where uniquename = 'testX'),
1,
false
);
* And two testing text files containing :
*
test1,test123
test2,test231
test3,test333
test4,test412
*
* and (for undo) :
*
test123,test1
test231,test2
test333,test3
test412,test4
*
*/
@Override
protected void doLoad(InputStream inputStream, Session session)
throws IOException {
final CvTerm previousSystematicIdType = cvDao.getCvTermByNameAndCvName("previous_systematic_id", "genedb_synonym_type");
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line;
int n=1;
while ((line = reader.readLine()) != null) {
if(line.length() > 0) {
String[] split = line.split(delimiter);
if (split.length < 2) {
continue;
}
String oldUniqueName = split[0];
String newUniqueName = split[1];
if (oldUniqueName.length() < 1 || newUniqueName.length() < 1) {
throw new RuntimeException(String.format("Error on this line : %s.", line));
}
logger.info(String.format("%d CONVERSION PATTERN: '%s' -> '%s'", n++, oldUniqueName, newUniqueName));
if (matchPrefixOnly) {
String namePattern = oldUniqueName + "%";
List<Feature> features = sequenceDao.getFeaturesByUniqueNamePattern(namePattern);
if (features.size() == 0) {
logger.warn("No matching features found.");
break;
}
for (Feature feature : features) {
String featureName = feature.getUniqueName();
String newFeatureName = featureName.replace(oldUniqueName, newUniqueName);
renameFeatureAndStorePreviousSystematicIds(session, previousSystematicIdType, feature, newFeatureName);
}
} else {
Feature feature = sequenceDao.getFeatureByUniqueName(oldUniqueName, Feature.class);
renameFeatureAndStorePreviousSystematicIds(session, previousSystematicIdType, feature, newUniqueName);
}
}
if (n % 50 == 1) {
logger.info("Clearing session");
session.flush();
session.clear();
}
}
}
private void renameFeatureAndStorePreviousSystematicIds(Session session, CvTerm previousSystematicIdType, Feature feature, String newUniqueName) {
if (! (feature instanceof AbstractGene)) {
return;
}
if (newUniqueNames.contains(newUniqueName)) {
logger.error(String.format("The newUniqueName '%s' has already been encountered. Skipping.", newUniqueName));
return;
} else {
newUniqueNames.add(newUniqueName);
}
if (feature == null) {
logger.error("Could not find feature, skipping");
return;
}
final String oldUniqueName = feature.getUniqueName();
logger.info(String.format("Renaming: '%s' -> '%s'", oldUniqueName, newUniqueName));
feature.setUniqueName(newUniqueName);
Collection<String> previous = feature.getPreviousSystematicIds();
logger.info("Previous Systematic IDS : " + previous);
// add the old name to the previous ID list if it's not there...
if (! previous.contains(oldUniqueName)) {
logger.info("Storing '" + oldUniqueName + "' as a previous systematic ID");
Synonym synonym = getOrCreateSynonym(session, oldUniqueName, previousSystematicIdType);
FeatureSynonym featureSynonym = feature.addSynonym(synonym);
// must be set to true for it to be shown on genedb
featureSynonym.setCurrent(true);
session.persist(featureSynonym);
} else {
logger.info("Already a previous systematic ID: '" + oldUniqueName + "'");
}
// This is the funny case where the newly supplied name is actually in the list
// of previous systematic IDs. We must try to remove it from the list, but only
// if we find that it's not used by other features.
if (previous.contains(newUniqueName)) {
// loop through all the synonyms
for (FeatureSynonym featureSynonym : feature.getFeatureSynonyms()) {
Synonym synonym = featureSynonym.getSynonym();
// only interested in previous systematic IDs
if (! synonym.getType().equals(previousSystematicIdType)) {
continue;
}
// if there is a match, let's see if we can delete it
if (synonym.getName().equals(newUniqueName)) {
logger.warn("Removing the link between the feature and the synonym (as the synonym is now the current name)");
// first delete the feature synonym that links them
session.delete(featureSynonym);
// We want to check to see if there are any other feature synonyms associated
// with this synonym. If there aren't any then it's safe to delete.
// We flush here to make sure the count is correct.
session.flush();
if (synonym.getFeatureSynonyms().size() == 0) {
logger.warn("Removing '" + newUniqueName + "' from the synonyms list (as it's now the current name, and no other features link to it)");
session.delete(synonym);
} else {
logger.warn("Not deleting the synonym as it's still being used by another feature");
}
}
}
}
session.update(feature);
logger.info("New name: '" + feature.getUniqueName() + "'");
}
/**
* Checks to see if a synonym has already been created, returns this if it has, creates a new if it hasn't.
* @param session
* @param name
* @param type
* @return a synonym for of the specified type and name.
*/
private Synonym getOrCreateSynonym(Session session, String name, CvTerm type) {
Synonym syn = sequenceDao.getSynonymByNameAndCvTerm(name, type);;
if (syn == null) {
logger.info ("Creating new synonym");
syn = new Synonym();
syn.setName(name);
syn.setSynonymSGML(name);
syn.setType(type);
session.persist(syn);
} else {
logger.info ("Reusing synonym");
}
return syn;
}
@Override
protected Set<String> getOptionNames() {
Set<String> options = new HashSet<String>();
Collections.addAll(options, "delimiter", "matchPrefixOnly");
return options;
}
@Override
protected boolean processOption(String optionName, String optionValue) {
logger.info(String.format("Setting option: '%s' :: '%s'", optionName, optionValue));
if (optionName.equals("delimiter")) {
delimiter = optionValue;
return true;
}
if (optionName.equals("matchPrefixOnly")) {
matchPrefixOnly = (optionValue.equals("true")) ? true : false;
return true;
}
return false;
}
}