/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.UMLS;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.erasmusmc.utilities.ReadTextFile;
import org.erasmusmc.utilities.StringUtilities;
public class UMLSFiltersBeforeOntologyCreation {
public static boolean isSuppressable(String[] columns) {
int suppressCol = 16;
String flag = columns[suppressCol];
if (flag.equals("Y") || flag.equals("E") || flag.equals("O")){
return true;
}
return false;
}
public static boolean isMoreThan255(String[] columns) {
int termCol = 14;
String term = columns[termCol];
if (term.length() > 255){
return true;
}
return false;
}
public static boolean notRightLanguage(String[] columns) {
int languageCol = 1;
String language = "ENG";
return !columns[languageCol].equals(language);
}
public static boolean isFromBadVocabulary(String[] columns) {
int vocCol = 11;
String voc = columns[vocCol];
if (voc.equals("LNC") || (voc.equals("NCI-CTCAE")) ){
return true;
}
return false;
}
public static String convertToLowerCaseIfWordsMoreThan2AndCharactersMoreThan10AndNotAbbreviationOrAcronym(String term, String voc, List abbreviationsOrAcronyms) {
if (!abbreviationsOrAcronyms.contains(term)) {
if ( voc.equals("HCPCS") || (voc.equals("SPN") || voc.equals("COSTAR") || voc.equals("RXNORM") || voc.equals("VANDF") || voc.equals("DXP") || voc.equals("MTHFDA") || voc.equals("MCM"))){
return term.toLowerCase();
}else if (termMoreThan2WordsAndLongerThan10Characters(term)){
return term.toLowerCase();
}
}
return term;
}
public static boolean termMoreThan2WordsAndLongerThan10Characters(String term){
List<String> wordsInTerm = StringUtilities.mapToWords(term);
int noOfwords = wordsInTerm.size();
int noOfCharacters = term.length();
if (noOfwords >2 && noOfCharacters >10){
return true;
}
return false;
}
public static ArrayList<String> getAbbreviationsAndAcronyms(String filename) {
ArrayList<String> abbreviationsOrAcronyms = new ArrayList<String>();
ReadTextFile textFile = new ReadTextFile(filename);
Iterator<String> fileIterator = textFile.getIterator();
while (fileIterator.hasNext()) {
String line = fileIterator.next();
if (line.length() != 0) {
List<String> columns = StringUtilities.safeSplit(line, '|');
String abbrOrAcr = columns.get(1).trim();
abbreviationsOrAcronyms.add(abbrOrAcr);
}
}
return abbreviationsOrAcronyms;
}
}