/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package JochemBuilder.SharedCurationScripts;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.WriteTextFile;
public class RemoveDictAndCompanyNamesAtEndOfTerm {
boolean rewriteRules = true;
boolean pharmaceuticalCompanies = true;
boolean dictionariesRule = true;
public OntologyStore run(OntologyStore originalOntology, String logfilePath) {
System.out.println("Starting script: "+StringUtilities.now());
/** Create log */
WriteTextFile logFile = new WriteTextFile(logfilePath);
OntologyStore newOntology = new OntologyStore();
Set<Integer> includedCUIs = new HashSet<Integer>();
/** Set ontology variables*/
int rewrittenTermsCount = 0;
Concept concept = null;
System.out.println("Rewriting... ");
Iterator<Concept> conceptIterator = originalOntology.getConceptIterator();
int lineCount = 0;
while (conceptIterator.hasNext()) {
lineCount++;
if (lineCount % 10000 == 0)
System.out.println(lineCount);
concept = conceptIterator.next();
if (concept.getID() > 0) {
List<TermStore> terms = concept.getTerms();
Iterator<TermStore> termIterator = terms.iterator();
while (termIterator.hasNext()) {
TermStore term = termIterator.next();
if (rewriteRules){
if(dictionariesRule){
String old = term.text;
String rewrittenTerm = JochemCurator.rewriteNameForDictionaries(term.text);
if(!rewrittenTerm.equals("")){
rewrittenTermsCount++;
term.text = rewrittenTerm;
logFile.writeln("REWRITTEN DUE TO DICTIONARY|"+term.text+"|"+ old + "|"+concept.getID());
}
}
if(pharmaceuticalCompanies){
String old = term.text;
String rewrittenTerm = JochemCurator.rewriteNameForPharmas(term.text);
if(!rewrittenTerm.equals("")){
term.text = rewrittenTerm;
logFile.writeln("REWRITTEN DUE TO PHARMA|"+term.text+"|"+ old + "|"+concept.getID());
}
}
}
}
}
// if (!concept.getTerms().isEmpty()) {
includedCUIs.add(concept.getID());
newOntology.setConcept(concept);
// }
}
// Copy relationships:
List<Relation> relations = originalOntology.getRelations();
for (Relation relation: relations)
if (includedCUIs.contains(relation.subject) && includedCUIs.contains(relation.object))
newOntology.setRelation(relation);
// Copy databaseIDs:
List<DatabaseID> databaseIDs;
for (int cui: includedCUIs) {
databaseIDs = originalOntology.getDatabaseIDsForConcept(cui);
if (databaseIDs != null)
for (DatabaseID databaseID: databaseIDs)
newOntology.setDatabaseIDForConcept(cui, databaseID);
}
/** Save to ontologyfile and log */
System.out.println("Closing logfile and saving new ontology: "+StringUtilities.now());
logFile.close();
System.out.println(rewrittenTermsCount+ " terms were rewritten");
return newOntology;
}
}