/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.peregrine.disambiguator; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import org.erasmusmc.peregrine.AbstractPeregrine; import org.erasmusmc.peregrine.ResultConcept; import org.erasmusmc.peregrine.ResultTerm; import org.erasmusmc.utilities.StringUtilities; public class NonFactRemover { public static Set<String> negationWords = createNegationWords(); public static Set<String> hypotheticalWords = createHypotheticalWords(); public static Set<String> doubtWords = createDoubtWords(); public static void removeAll(AbstractPeregrine peregrine){ removeNonFacts(peregrine, negationWords, false); removeNonFacts(peregrine, hypotheticalWords, false); //removeNonFacts(peregrine, doubtWords, true); removeDoubts(peregrine, doubtWords); } public static void removeNonFacts(AbstractPeregrine peregrine, Set<String> triggerWords, boolean deleteLeft){ int lastEOS = 0; for (int eos : peregrine.tokenizer.endOfSentence){ //has negation? int negation = -1; for (int i = lastEOS; i < eos; i++) if (triggerWords.contains(StringUtilities.firstLetterToLowerCase(peregrine.tokenizer.tokens.get(i)))){ negation = i; break; } //remove terms after negation: if (negation != -1){ int start = negation; if (deleteLeft) start = lastEOS; Iterator<ResultTerm> iterator = peregrine.resultTerms.iterator(); while(iterator.hasNext()){ ResultTerm term = iterator.next(); if (term.words[0] >= start && term.words[0] < eos){ iterator.remove(); } } } mapTerms2Concepts(peregrine); lastEOS = eos; } } public static void removeDoubts(AbstractPeregrine peregrine, Set<String> triggerWords){ for (int i = 0; i < peregrine.tokenizer.tokens.size(); i++) if (triggerWords.contains(StringUtilities.firstLetterToLowerCase(peregrine.tokenizer.tokens.get(i)))){ //triggerword found: remove and concepts directly before and after Iterator<ResultTerm> iterator = peregrine.resultTerms.iterator(); while(iterator.hasNext()){ ResultTerm term = iterator.next(); if (contains(term.words, i-1) || contains(term.words, i+1)){ iterator.remove(); } } } mapTerms2Concepts(peregrine); } private static boolean contains(int[] words, int i) { for (int word : words) if (word == i) return true; return false; } private static Set<String> createHypotheticalWords() { Set<String> result = new HashSet<String>(); result.add("can"); result.add("consistent"); result.add("could"); result.add("either"); result.add("evaluate"); result.add("favor"); result.add("likely"); result.add("may"); result.add("might"); result.add("most"); result.add("or"); result.add("possibility"); result.add("possible"); result.add("possibly"); result.add("presume"); result.add("probable"); result.add("probably"); result.add("question"); result.add("questionable"); result.add("rule"); result.add("should"); result.add("sometimes"); result.add("suggest"); result.add("suggestion"); result.add("suggestive"); result.add("suspect"); result.add("unless"); result.add("unsure"); result.add("will"); result.add("would"); return result; } private static Set<String> createDoubtWords() { Set<String> result = new HashSet<String>(); result.add("or"); return result; } //Generate resultConcepts based on resultTerms: protected static void mapTerms2Concepts(AbstractPeregrine peregrine){ //remove concepts: Set<ResultTerm> remainingTerms = new HashSet<ResultTerm>(peregrine.resultTerms); Iterator<ResultConcept> conceptIterator = peregrine.resultConcepts.iterator(); while (conceptIterator.hasNext()){ ResultConcept concept = conceptIterator.next(); Iterator<ResultTerm> termIterator = concept.terms.iterator(); while (termIterator.hasNext()){ ResultTerm term = termIterator.next(); if (!remainingTerms.contains(term)) termIterator.remove(); } if (concept.terms.size() == 0) conceptIterator.remove(); } /*peregrine.resultConcepts.clear(); Map<Integer, ResultConcept> id2concept = new TreeMap<Integer, ResultConcept>(); int conceptId; for (ResultTerm resultterm : peregrine.resultTerms){ for (int i = 0; i < resultterm.term.conceptId.size(); i++){ conceptId = resultterm.term.conceptId.get(i); ResultConcept resultconcept = id2concept.get(conceptId); if (resultconcept == null) { resultconcept = new ResultConcept(); resultconcept.conceptId = conceptId; id2concept.put(conceptId, resultconcept); peregrine.resultConcepts.add(resultconcept); } resultconcept.terms.add(resultterm); } }*/ } private static Set<String> createNegationWords() { Set<String> result = new HashSet<String>(); result.add("cannot"); result.add("no"); result.add("not"); result.add("vs"); result.add("versus"); result.add("without"); //result.add("exclude"); return result; } }