/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package casperSoftwareCode;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.erasmusmc.collections.Pair;
import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities;
public class RulesCombination {
public Map<Integer, Set<String>> cuisWithTerms;
public Set<String> allTermsInUMLS;
public RulesCombination(Map<Integer, Set<String>> cuisWithTerms, Set<String> allTermsInUMLS){
this.cuisWithTerms = cuisWithTerms;
this.allTermsInUMLS = allTermsInUMLS;
}
public static int syntacticInversionRule = 1;
public static int possessiveRule = 2;
public static int shortFormLongFormRule = 3;
public static int angularBracketsRule = 4;
public static int parenthesesWithSemanticTypeRule = 5;
public static int leftParenthesisRule = 6;
public static int rightParenthesisRule = 7;
public static int leftBracketsRule = 8;
public static int rightBracketsRule = 9;
public static int NonEssentialParentheticalsRule = 10;
public static int endParenthesesContainsFilteredWordRule = 11;
public static Set<Integer> chemicalSemanticTypes = OntologyUtilities.getChemicalSemanticTypes();
public static Set<String> stopwordsForFiltering = getMedlineStopWordsForFiltering();
public static Map<Integer, Map<String, Set<Integer>>> cuisWithRuleNo = new HashMap<Integer, Map<String, Set<Integer>>>();
//Rewrite rules
public CasperConcept applySyntacticInversionRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = "";
rewrittenTermText = Rules.findAndRewriteSyntacticUniversion(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, syntacticInversionRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, syntacticInversionRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyPossessiveRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewritePossessive(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, possessiveRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, possessiveRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public List<CasperConcept> applyShortformLongformRule(CasperConcept concept){
List<CasperConcept> conceptsToReturn = new ArrayList<CasperConcept>();
List<Pair<String, String>> rewrittenToShortFormAndLongForm = Rules.findShortformLongformPattern(concept.getTermText());
CasperConcept shortformRewrittenConcept = null;
CasperConcept longformRewrittenConcept = null;
if (rewrittenToShortFormAndLongForm != null){
String shortForm = rewrittenToShortFormAndLongForm.get(0).object1.trim();
String longForm = rewrittenToShortFormAndLongForm.get(0).object2.trim();
if (newTermUniqueForConceptCaseInsensitive(concept, shortForm)){
if (newTermUniqueForConceptAndRule(concept, shortForm, shortFormLongFormRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(shortForm)){
shortformRewrittenConcept = setRewriteConcept(concept, shortformRewrittenConcept, shortForm, shortFormLongFormRule);
conceptsToReturn.add(shortformRewrittenConcept);
addTermToUniqueCuiTermRuleCombination(shortformRewrittenConcept);
}
}
}
if (newTermUniqueForConceptCaseInsensitive(concept, longForm)){
if (newTermUniqueForConceptAndRule(concept, longForm, shortFormLongFormRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(longForm)){
longformRewrittenConcept = setRewriteConcept(concept, longformRewrittenConcept, longForm, shortFormLongFormRule);
conceptsToReturn.add(longformRewrittenConcept);
addTermToUniqueCuiTermRuleCombination(longformRewrittenConcept);
}
}
}
}
return conceptsToReturn;
}
public CasperConcept applyAngluarBracketsRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteAngularBrackets(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, angularBracketsRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, angularBracketsRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applySemanticTypesRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteParenthesesWithSemanticType(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, parenthesesWithSemanticTypeRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, parenthesesWithSemanticTypeRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyLeftSideParenthesesRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteBeginParentheses(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, leftParenthesisRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, leftParenthesisRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyRightSideParenthesesRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteEndParentheses(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, rightParenthesisRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, rightParenthesisRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyLeftSideBracketsRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteBeginBrackets(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, leftBracketsRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, leftBracketsRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyRightSideBracketsRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteEndBrackets(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, rightBracketsRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, rightBracketsRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
public CasperConcept applyNonEssentialParantheticalsRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteNonEssentialParentheticals(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, NonEssentialParentheticalsRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, NonEssentialParentheticalsRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
/** public CasperConcept applyEndParenthesesContainsFilteredWordRule(CasperConcept concept){
CasperConcept rewrittenConcept = null;
String rewrittenTermText = Rules.findAndRewriteEndParenthesesContainsFilteredWordPattern(concept.getTermText());
if (!rewrittenTermText.equals("")){
if (newTermUniqueForConceptCaseInsensitive(concept, rewrittenTermText)){
if (newTermUniqueForConceptAndRule(concept, rewrittenTermText, endParenthesesContainsFilteredWordRule)){
if (rewrittenTermIsNotCaseInsensitiveHomonym(rewrittenTermText.trim())){
rewrittenConcept = setRewriteConcept(concept, rewrittenConcept, rewrittenTermText, endParenthesesContainsFilteredWordRule);
addTermToUniqueCuiTermRuleCombination(rewrittenConcept);
}
}
}
}
return rewrittenConcept;
}
*/
//Suppress rules
public static boolean applyDosagesRule(CasperConcept concept){
if (Rules.findAndSuppressDosages(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyAtSignRule(CasperConcept concept){
if (Rules.findAndSuppressAtSign(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyMartijnsRule(CasperConcept concept){
if (Rules.MartijnsFilterRule(concept.getTermText(), stopwordsForFiltering)){
return true;
}
return false;
}
public static boolean applyECrule(CasperConcept concept){
if (Rules.findAndSuppressECnumbers(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyNECrule(CasperConcept concept){
if (Rules.findAndSuppressNEC(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyNOSrule(CasperConcept concept){
if (Rules.findAndSuppressNOS(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyMiscRule(CasperConcept concept){
if (Rules.findAndSuppressMisc(concept.getTermText())){
return true;
}
return false;
}
public static boolean applyNoOfWordsMoreThanFiveRule(CasperConcept concept){
if (Rules.findAndSuppressWordsMoreThanFiveWords(concept)){
return true;
}
return false;
}
public boolean conceptHasChemicalSemanticType(CasperConcept concept){
Set<Integer> semsForConcept = concept.getSemType();
Iterator<Integer> semIterator = semsForConcept.iterator();
while (semIterator.hasNext()){
Integer semID = semIterator.next();
if (chemicalSemanticTypes.contains(-semID)){
return true;
}
}
return false;
}
public static Set<String> getMedlineStopWordsForFiltering() {
Set<String> result = new TreeSet<String>();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(RulesCombination.class.getResourceAsStream("stopWordsMedline.txt")));
try {
while (bufferedReader.ready()) {
result.add(bufferedReader.readLine());
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
public boolean newTermUniqueForConceptCaseInsensitive(CasperConcept concept, String newTermText){
String newText = Rules.makeLowerCaseAndRemoveEos(newTermText);
Set<String> TempTerms = cuisWithTerms.get(concept.getCUI());
Iterator<String> setIt = TempTerms.iterator();
while (setIt.hasNext()){
String item = setIt.next();
if (newText.equals(item)) return false;
}
return true;
}
public boolean rewrittenTermIsNotCaseInsensitiveHomonym(String text){
String newText = Rules.makeLowerCaseAndRemoveEos(text);
if (allTermsInUMLS.contains(newText)) return false;
allTermsInUMLS.add(newText);
return true;
}
public boolean newTermUniqueForConceptAndRule(CasperConcept concept, String newTermText, Integer rewriteRule){
if (cuisWithRuleNo.get(concept.getCUI())!= null){
String text = Rules.makeLowerCaseAndRemoveEos(newTermText);
Map<String, Set<Integer>> tempMap = cuisWithRuleNo.get(concept.getCUI());
if (tempMap.get(text)!= null){
Set<Integer> tempRules = tempMap.get(text);
if (tempRules.contains(rewriteRule)) return false;
}
}
return true;
}
public CasperConcept setRewriteConcept(CasperConcept concept, CasperConcept rewrittenConcept, String rewrittenTermText, int rule){
rewrittenConcept = new CasperConcept();
rewrittenConcept.setTermText(rewrittenTermText.trim());
rewrittenConcept.setCUI(concept.getCUI());
rewrittenConcept.setSUI(concept.getSUI());
rewrittenConcept.setRewriteRuleFlag(rule);
rewrittenConcept.setSemType(concept.getSemType());
return rewrittenConcept;
}
public void addTermToUniqueCuiTermRuleCombination(CasperConcept rewrittenConcept){
String text = Rules.makeLowerCaseAndRemoveEos(rewrittenConcept.getTermText());
Map<String, Set<Integer>> tempMap = cuisWithRuleNo.get(rewrittenConcept.getCUI());
if (tempMap==null){
Set<Integer> rules = new HashSet<Integer>();
Map<String, Set<Integer>> map = new HashMap<String, Set<Integer>>();
rules.add(rewrittenConcept.getRewriteRuleFlag());
map.put(text, rules);
cuisWithRuleNo.put(rewrittenConcept.getCUI(), map);
}else {
Set<Integer> tempRules = tempMap.get(text);
if (tempRules==null){
Set<Integer> secondTempRules = new HashSet<Integer>();
secondTempRules.add(rewrittenConcept.getRewriteRuleFlag());
tempMap.put(text, secondTempRules);
cuisWithRuleNo.put(rewrittenConcept.getCUI(), tempMap);
}else {
tempRules.add(rewrittenConcept.getRewriteRuleFlag());
tempMap.put(text, tempRules);
cuisWithRuleNo.put(rewrittenConcept.getCUI(), tempMap);
}
}
}
}