/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.dataimport.UMLS; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; import org.erasmusmc.utilities.ReadTextFile; import org.erasmusmc.utilities.StringUtilities; import org.erasmusmc.utilities.WriteTextFile; import casperSoftwareCode.AssignSemanticTypeToCui; import casperSoftwareCode.CasperConcept; import casperSoftwareCode.CasperFilters; import casperSoftwareCode.ExtractCUIsAndTermsFromMRCONSO; import casperSoftwareCode.RulesCombination; public class RewriteAndSuppressUMLSusingCasper { public static boolean dosagesRule = true; public static boolean atsignRule = true; public static boolean shortTokenRule = true; public static boolean necRule = true; public static boolean nosRule = true; public static boolean ecNumbersRule = true; public static boolean miscRule = true; public static boolean wordsMoreThanFiveRule = false; String mrconsoPath;// = "/home/khettne/UMLS2008/2008AA/META/MRCONSO.RRF"; String mrstyPath;// = "/home/khettne/UMLS2008/2008AA/META/MRSTY.RRF"; String logfilePath;// = "/home/khettne/UMLS2008/UMLS_rewriting_log.log"; String ontologyPath;// = "/home/khettne/UMLS2008/MRCONSO2008AA_rewrittenAndSuppressed.RRF"; boolean suppressRules = true; boolean rewriteRules = true; boolean syntacticInvRule = true; boolean possessivesRule = true; boolean shortformlongformRule = true; boolean angularBracketsRule = true; boolean semanticTypesRule = true; boolean beginParenthesesRule = false; boolean endParenthesesRule = false; boolean beginBracketsRule = false; boolean endBracketsRule = false; public static boolean chemical = false; /** These variables are required for the non-Casper rules: */ //Patterns for suppress rules public static Pattern Retiredpattern = Pattern.compile("retired code", Pattern.CASE_INSENSITIVE); public static Pattern CurlyParenthesispattern = Pattern.compile("\\{.*\\}"); public static Pattern xxxPattern = Pattern.compile("xxx", Pattern.CASE_INSENSITIVE); public static Pattern proteinWeightPattern = Pattern.compile("^[0-9]+ ?[kK][dD][aA]?$"); public static int maxtermsize = 100; public static int mintermsize = 3; public static boolean retiredPatternRule = true; public static boolean curlyParenthesesRule = true; public static boolean xxxPatternRule = true; public static boolean proteinWeightPatternRule = true; public static boolean maxTermSize100Rule = true; public static boolean minTermSize3Rule = true; public static Set<Integer> filteredSemanticTypes = getSemanticTypesForFiltering(); public static Set<Integer> filteredSemanticTypesNotMesh = getSemanticTypesForFilteringNotMesh(); // Variables for rewrite rules public boolean nonEssentialParentheticalsRule = true; public boolean endParenthesesContainsFilteredWordRule = true; public void run(String mrconsoPath, String mrstyPath, String ontologyPath, String logFilePath) { this.mrconsoPath = mrconsoPath; this.mrstyPath = mrstyPath; this.ontologyPath = ontologyPath; this.logfilePath = logFilePath; /** Create log */ WriteTextFile logFile = new WriteTextFile(logfilePath); /** Create datatypes for homonym checks */ System.out.println("Creating datatypes for homonym checks..."); Map<Integer, Set<String>> cuisWithTerms = ExtractCUIsAndTermsFromMRCONSO.extractCuisAndTermsAsMap(mrconsoPath); Collection<Set<String>> allTermsInUMLS = cuisWithTerms.values(); Set<String> allTerms = new HashSet<String>(); for (Set<String> valueSet: allTermsInUMLS){ for(String value: valueSet){ allTerms.add(value); } } allTermsInUMLS = null; /** Add semantic types from MRSTY */ System.out.println("Adding semantic types to concepts..."); Map<Integer, Set<Integer>> conceptsWithSemTypes = AssignSemanticTypeToCui.getCuisWithSemanticTypes(mrstyPath); /** Set ontology variables*/ WriteTextFile newOntologyFile = new WriteTextFile(ontologyPath); RulesCombination rulesClass = new RulesCombination(cuisWithTerms, allTerms); int vocCol = 11; int cuiCol = 0; int suiCol = 5; int termTextCol = 14; int cui = -1; int prevCui =-1; int sui = -1; int prevSui = -1; int removedDueToBadSemTypeCount = 0; int moreThan255count = 0; int nonEnglishTermsCount = 0; int suppressableTermsCount = 0; int rewrittenTermsCount = 0; int suppressedTermsCount = 0; CasperConcept concept = null; /** Read from MRCONSO */ System.out.println("Reading from MRCONSO.RRF... "); ReadTextFile textFile = new ReadTextFile(mrconsoPath); Iterator<String> fileIterator = textFile.getIterator(); int lineCount = 0; while (fileIterator.hasNext()) { lineCount++; if (lineCount % 100000 == 0) System.out.println(lineCount+" lines processed from MRCONSO.RRF"); String line = fileIterator.next(); if (line.length() != 0) { String[] columns = line.split("\\|"); String voc = columns[vocCol].trim(); cui = Integer.parseInt(columns[cuiCol].trim().substring(1, columns[cuiCol].length())); Set<Integer> semtypes = conceptsWithSemTypes.get(cui); if (CasperFilters.notRightLanguage(columns)) { logFile.writeln("TERM NOT IN ENGLISH LANGUAGE|" + line); nonEnglishTermsCount++; } else if (CasperFilters.isMoreThan255(columns)) { logFile.writeln("TERM FIELD MORE THAN 255 CHARACTERS|" + line); moreThan255count++; } else if (CasperFilters.isSuppressable(columns)) { logFile.writeln("TERM MARKED AS SUPPRESSABLE BY NLM|" + line); suppressableTermsCount++; } else if (semanticFilter(voc, semtypes)){ logFile.writeln("TERM REMOVED DUE TO BAD SEMANTIC TYPE|" + line); removedDueToBadSemTypeCount++; } else { cui = Integer.parseInt(columns[cuiCol].trim().substring(1, columns[cuiCol].length())); sui = Integer.parseInt(columns[suiCol].trim().substring(1, columns[suiCol].length())); String term = columns[termTextCol].trim(); if (prevCui != cui) { RulesCombination.cuisWithRuleNo.clear(); } prevCui = cui; if (prevSui != sui) { concept = new CasperConcept(); concept.setCUI(cui); concept.setSUI(sui); concept.setTermText(term); concept.setSemType(conceptsWithSemTypes.get(concept.getCUI())); boolean suppressed = false; if (rulesClass.conceptHasChemicalSemanticType(concept)) chemical = true; if(suppressRules){ if(applySuppressRules(concept)){ logFile.writeln("TERM REMOVED DUE TO SUPPRESS RULE|" + line); suppressed = true; suppressedTermsCount++; } } if (rewriteRules){ if (!suppressed){ if(syntacticInvRule){ CasperConcept rewrittenConcept = rulesClass.applySyntacticInversionRule(concept); if(rewrittenConcept!=null){ if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SYN"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SYN"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(possessivesRule){ CasperConcept rewrittenConcept = rulesClass.applyPossessiveRule(concept); if(rewrittenConcept!=null){ if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+POS"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+POS"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(shortformlongformRule){ List<CasperConcept> rewrittenConceptList = rulesClass.applyShortformLongformRule(concept); if(!rewrittenConceptList.isEmpty()){ // RulesCombination.rewrittenAndSuppressed = true; for(CasperConcept rewrittenConcept: rewrittenConceptList){ if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SFLF"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SFLF"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } } if(angularBracketsRule){ CasperConcept rewrittenConcept = rulesClass.applyAngluarBracketsRule(concept); if(rewrittenConcept!=null){ if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+ANG"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+ANG"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(semanticTypesRule){ CasperConcept rewrittenConcept = rulesClass.applySemanticTypesRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SEM"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+SEM"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(beginParenthesesRule && !chemical){ CasperConcept rewrittenConcept = rulesClass.applyLeftSideParenthesesRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+BPA"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+BPA"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(endParenthesesRule && !chemical){ CasperConcept rewrittenConcept = rulesClass.applyRightSideParenthesesRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+EPA"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+EPA"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(beginBracketsRule && !chemical){ CasperConcept rewrittenConcept = rulesClass.applyLeftSideBracketsRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+BBR"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+BBR"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(endBracketsRule && !chemical){ CasperConcept rewrittenConcept = rulesClass.applyRightSideBracketsRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+EBR"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+EBR"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } if(nonEssentialParentheticalsRule){ CasperConcept rewrittenConcept = rulesClass.applyNonEssentialParantheticalsRule(concept); if(rewrittenConcept!=null){ // RulesCombination.rewrittenAndSuppressed = true; if(suppressRules){ if(!applySuppressRules(rewrittenConcept)){ rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+NON"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } }else { rewrittenTermsCount++; newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+NON"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); } } } // if(endParenthesesContainsFilteredWordRule){ // CasperConcept rewrittenConcept = rulesClass.applyEndParenthesesContainsFilteredWordRule(concept); // if(rewrittenConcept!=null){ // if(suppressRules){ // if(!applySuppressRules(rewrittenConcept)){ // rewrittenTermsCount++; // newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+DIS"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); // } // }else { // rewrittenTermsCount++; // newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"+DIS"+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+rewrittenConcept.getTermText()+"|"+columns[15]+"|"+columns[16]+"|"); // } // } // } } } if (!suppressed){ newOntologyFile.writeln(columns[0]+"|"+columns[1]+"|"+columns[2]+"|"+columns[3]+"|"+columns[4]+"|"+columns[5]+"|"+columns[6]+"|"+columns[7]+"|"+columns[8]+"|"+columns[9]+"|"+columns[10]+"|"+columns[11]+"|"+columns[12]+"|"+columns[13]+"|"+columns[14]+"|"+columns[15]+"|"+columns[16]+"|"); } prevSui = sui; chemical = false; // RulesCombination.rewrittenAndSuppressed = false; } } } } /** Save to ontologyfile and log */ System.out.println("Closing logfile and saving to new MRCONSO file: "+StringUtilities.now()); logFile.close(); newOntologyFile.close(); System.out.println(removedDueToBadSemTypeCount+ " terms were removed due to bad semantic type"); System.out.println(moreThan255count+ " terms were removed due to length > 255 characters"); System.out.println(nonEnglishTermsCount+ " non-english terms were removed"); System.out.println(suppressableTermsCount+ " terms marked as suppressable by NLM were removed"); System.out.println(suppressedTermsCount+ " terms were removed by suppress rules"); System.out.println(rewrittenTermsCount+ " terms were rewritten and added"); } public static boolean applySuppressRules(CasperConcept concept){ //Suppressrules from Casper if(dosagesRule){ if(RulesCombination.applyDosagesRule(concept)) return true; } if(atsignRule){ if(RulesCombination.applyAtSignRule(concept)) return true; } if(shortTokenRule){ if(RulesCombination.applyMartijnsRule(concept)) return true; } if(ecNumbersRule){ if(RulesCombination.applyECrule(concept)) return true; } if(necRule){ if(RulesCombination.applyNECrule(concept)) return true; } if(nosRule){ if(RulesCombination.applyNOSrule(concept)) return true; } if(miscRule){ if(RulesCombination.applyMiscRule(concept)) return true; } if(wordsMoreThanFiveRule && !chemical){ if(RulesCombination.applyNoOfWordsMoreThanFiveRule(concept)) return true; } //Other suppress rules String t = concept.getTermText(); if(maxTermSize100Rule && !chemical){ if (t.length() > maxtermsize) return true; } if(minTermSize3Rule){ if (t.length() < mintermsize) return true; } if(curlyParenthesesRule){ if (CurlyParenthesispattern.matcher(t).find() && !chemical) return true; } if(retiredPatternRule){ if (Retiredpattern.matcher(t).find()) return true; } if(xxxPatternRule){ if (xxxPattern.matcher(t).find()) return true; } if(proteinWeightPatternRule){ if (proteinWeightPattern.matcher(t).matches()) return true; } return false; } private static boolean semanticFilter(String voc, Set<Integer> semtypes) { Iterator<Integer> semIterator = semtypes.iterator(); while (semIterator.hasNext()){ Integer semID = semIterator.next(); if (filteredSemanticTypes.contains(-semID)){ return true; }else if (!voc.equals("MSH") && filteredSemanticTypesNotMesh.contains(-semID)) { return true; } } return false; } private static Set<Integer> getSemanticTypesForFiltering() { Set<Integer> result = new TreeSet<Integer>(); result.add(-71); result.add(-185); result.add(-78); result.add(-171); result.add(-122); return result; } private static Set<Integer> getSemanticTypesForFilteringNotMesh() { Set<Integer> result = new TreeSet<Integer>(); result.add(-201); result.add(-200); result.add(-170); result.add(-97); result.add(-73); result.add(-74); result.add(-203); result.add(-79); result.add(-80); result.add(-81); result.add(-82); result.add(-83); result.add(-169); result.add(-77); result.add(-92); result.add(-93); result.add(-94); return result; } }