/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package JochemBuilder.SharedCurationScripts;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.WriteTextFile;
public class RewriteFurther {
public OntologyStore run(OntologyStore originalOntology, String logfilePath) {
System.out.println("Starting script: "+StringUtilities.now());
/** Create log */
WriteTextFile logFile = new WriteTextFile(logfilePath);
OntologyStore newOntology = new OntologyStore();
Set<Integer> includedCUIs = new HashSet<Integer>();
/** Set ontology variables*/
int rewrittenTermsCount = 0;
int suppressedTermsCount = 0;
boolean suppressed = false;
Concept concept = null;
System.out.println("Rewriting... ");
Iterator<Concept> conceptIterator = originalOntology.getConceptIterator();
int lineCount = 0;
while (conceptIterator.hasNext()) {
lineCount++;
if (lineCount % 10000 == 0)
System.out.println(lineCount);
concept = conceptIterator.next();
if (concept.getID() > 0) {
List<TermStore> terms = concept.getTerms();
Iterator<TermStore> termIterator = terms.iterator();
while (termIterator.hasNext()) {
TermStore term = termIterator.next();
if (term.text.startsWith("CID")
|| term.text.startsWith("ZINC0")
|| term.text.startsWith("AIDS")
|| term.text.startsWith("MOLI")
|| term.text.startsWith("N/A")
|| term.text.startsWith("MLS0")
|| term.text.startsWith("SMR0")
|| term.text.startsWith("UgiM1_")
|| term.text.contains("Beilstein Handbook Reference")
){
suppressed = true;
suppressedTermsCount++;
termIterator.remove();
logFile.writeln("SUPPRESSED: "+ term.text);
}
if (!suppressed){
List<String> words = StringUtilities.mapToWords(term.text);
if (words.size() > 2 || term.text.length() > 10) {
boolean check = true;
for (String word: words) {
if (!(StringUtilities.countsCharactersInUpperCase(word) == word.length())) {
check = false;
}
}
if (check) {
logFile.writeln("REWRITTEN "+term.text+" TO: "+term.text.toLowerCase() );
term.text = term.text.toLowerCase();
rewrittenTermsCount++;
}
}
}
suppressed = false;
}
concept.setTerms(terms);
OntologyUtilities.removeDuplicateTerms(concept.getTerms());
}
if (!concept.getTerms().isEmpty() || concept.getID() < 0) {
includedCUIs.add(concept.getID());
newOntology.setConcept(concept);
}
}
// Copy relationships:
List<Relation> relations = originalOntology.getRelations();
for (Relation relation: relations)
if (includedCUIs.contains(relation.subject) && includedCUIs.contains(relation.object))
newOntology.setRelation(relation);
// Copy databaseIDs:
List<DatabaseID> databaseIDs;
for (int cui: includedCUIs) {
databaseIDs = originalOntology.getDatabaseIDsForConcept(cui);
if (databaseIDs != null)
for (DatabaseID databaseID: databaseIDs)
newOntology.setDatabaseIDForConcept(cui, databaseID);
}
/** Save to ontologyfile and log */
System.out.println("Closing logfile and saving new ontology: "+StringUtilities.now());
logFile.close();
System.out.println(rewrittenTermsCount+ " terms were rewritten");
System.out.println(suppressedTermsCount+ " terms were suppressed");
return newOntology;
}
}