package com.google.code.negex;
/***************************************************************************************
* Author: Junebae Kye, Supervisor: Imre Solti
* Date: 06/30/2010
*
* Wendy Chapman's NegEx algorithm in Java.
*
* Sentence boundaries serve as WINDOW for negation (suggested by Wendy Chapman)
*
*
* NOTES:
* If the negation scope exists in a sentence, it will print the negation scope such as 1 - 1, 0 - 24.
* If the negation does not exist in a sentence, it will print -1
* If a pre-UMLS phrase is used as a post-UMLS phrase, for example, pain and fever denied, it will print the negation scope of, in this case, 0 - 2, for an option of yes or print -2 for an option of no
*
* Modified by Sangwon Park for OMTwitter (May 03, 2012)
*
****************************************************************************************/
import java.util.*;
public class GenNegEx {
private List<String> pseNegPhrases; // list of pseudo-negation phrases
private List<String> negPhrases; // list of negation phrases
private List<String> postNegPhrases; // list of post-negation pharses
private List<String> conjunctions; // list of conjunctions
private boolean value; // boolean for an option of yes or no
// post: constructs a GenNegEx object
// creates a list of negation phrases, pseudo-negation phrases, post-negation phrases, and conjunction
public GenNegEx(boolean value) {
pseNegPhrases = new LinkedList<String>();
negPhrases = new LinkedList<String>();
postNegPhrases = new LinkedList<String>();
conjunctions = new LinkedList<String>();
processPhrases(pseNegPhrases, negPhrases, postNegPhrases, conjunctions);
sorts(pseNegPhrases);
sorts(negPhrases);
sorts(postNegPhrases);
sorts(conjunctions);
this.value = value;
}
// post: sorts a list in descending order
private void sorts(List<String> list) {
Collections.sort(list);
Collections.reverse(list);
}
// post: returns a negation scope of an input sentence
public String negScope(String line) {
String[] s = line.split("\\s+");
return helper(s, 0);
}
// post: returns a negation scope of an input sentence
public int[] negScopeN(String line) {
String[] s = line.split("\\s+");
return helperN(s, 0);
}
// post: processes data and returns negation scope
// returns -1 if no negation phrase is found
private String helper(String[] s, int index) {
if (index < s.length)
for (int i = index; i < s.length; i++) {
int indexII = contains(s, pseNegPhrases, i, 0);
if (indexII != -1)
return helper(s, indexII);
else {
int indexIII = contains(s, negPhrases, i, 0);
if (indexIII != -1) {
int indexIV = -1;
for (int j = indexIII; j < s.length; j++) {
indexIV = contains(s, conjunctions, j, 1);
if (indexIV != -1)
break;
}
if (indexIV != -1)
return indexIII + " - " + indexIV;
else
if (indexIII > s.length - 1)
if (value)
return "0 - " + (indexIII - 2);
else
return "-2";
else
return indexIII + " - " + (s.length - 1);
} else {
int indexV = contains(s, postNegPhrases, i , 1);
if (indexV != -1)
return "0 - " + indexV;
}
}
}
return "-1";
}
// post: processes data and returns negation scope
// returns -1 if no negation phrase is found
private int[] helperN(String[] s, int index) {
if (index < s.length)
for (int i = index; i < s.length; i++) {
int indexII = contains(s, pseNegPhrases, i, 0);
if (indexII != -1)
return helperN(s, indexII);
else {
int indexIII = contains(s, negPhrases, i, 0);
if (indexIII != -1) {
int indexIV = -1;
for (int j = indexIII; j < s.length; j++) {
indexIV = contains(s, conjunctions, j, 1);
if (indexIV != -1)
break;
}
if (indexIV != -1)
return new int[] {indexIII, indexIV};
else
if (indexIII > s.length - 1)
if (value)
return new int[] {0, (indexIII - 2)};
else
return null;
else
return new int[] {indexIII, (s.length - 1)};
} else {
int indexV = contains(s, postNegPhrases, i , 1);
if (indexV != -1)
return new int[] {0, indexV};
}
}
}
return null;
}
// post: returns index of negation phrase if any negation phrase is found in a sentence
// returns -1 if no negation phrase is found
private int contains(String[] s, List<String> list, int index, int type) {
int counts = 0;
for (String token : list) {
String[] element = token.split("\\s+");
if (element.length == 1) {
if (s[index].equals(element[0]))
return index + 1;
} else
if (s.length - index >= element.length) {
String firstWord = s[index];
if (firstWord.equals(element[0])) {
counts++;
for (int i = 1; i < element.length; i++) {
if (s[index + i].equals(element[i]))
counts++;
else {
counts = 0;
break;
}
if (counts == element.length)
if (type == 0)
return index + i + 1;
else
return index;
}
}
}
}
return -1;
}
// post: saves pseudo negation phrases, negation phrases, conjunctions into the database
private void processPhrases(List<String> pseNegPhrases, List<String> negPhrases, List<String> postNegPhrases, List<String> conjunctions) {
pseNegPhrases.add("no increase");
pseNegPhrases.add("no change");
pseNegPhrases.add("no suspicious change");
pseNegPhrases.add("no significant change");
pseNegPhrases.add("no interval change");
pseNegPhrases.add("no definite change");
pseNegPhrases.add("not extend");
pseNegPhrases.add("not cause");
pseNegPhrases.add("not drain");
pseNegPhrases.add("not significant interval change");
pseNegPhrases.add("not certain if");
pseNegPhrases.add("not certain whether");
pseNegPhrases.add("gram negative");
pseNegPhrases.add("without difficulty");
pseNegPhrases.add("not necessarily");
pseNegPhrases.add("not only");
negPhrases.add("absence of");
negPhrases.add("cannot see");
negPhrases.add("cannot");
negPhrases.add("checked for");
negPhrases.add("declined");
negPhrases.add("declines");
negPhrases.add("denied");
negPhrases.add("denies");
negPhrases.add("denying");
negPhrases.add("evaluate for");
negPhrases.add("fails to reveal");
negPhrases.add("free of");
negPhrases.add("negative for");
negPhrases.add("never developed");
negPhrases.add("never had");
negPhrases.add("no");
negPhrases.add("no abnormal");
negPhrases.add("no cause of");
negPhrases.add("no complaints of");
negPhrases.add("no evidence");
negPhrases.add("no new evidence");
negPhrases.add("no other evidence");
negPhrases.add("no evidence to suggest");
negPhrases.add("no findings of");
negPhrases.add("no findings to indicate");
negPhrases.add("no mammographic evidence of");
negPhrases.add("no new");
negPhrases.add("no radiographic evidence of");
negPhrases.add("no sign of");
negPhrases.add("no significant");
negPhrases.add("no signs of");
negPhrases.add("no suggestion of");
negPhrases.add("no suspicious");
negPhrases.add("not");
negPhrases.add("not appear");
negPhrases.add("not appreciate");
negPhrases.add("not associated with");
negPhrases.add("not complain of");
negPhrases.add("not demonstrate");
negPhrases.add("not exhibit");
negPhrases.add("not feel");
negPhrases.add("not had");
negPhrases.add("not have");
negPhrases.add("not know of");
negPhrases.add("not known to have");
negPhrases.add("not reveal");
negPhrases.add("not see");
negPhrases.add("not to be");
negPhrases.add("patient was not");
negPhrases.add("rather than");
negPhrases.add("resolved");
negPhrases.add("test for");
negPhrases.add("to exclude");
negPhrases.add("unremarkable for");
negPhrases.add("with no");
negPhrases.add("without any evidence of");
negPhrases.add("without evidence");
negPhrases.add("without indication of");
negPhrases.add("without sign of");
negPhrases.add("without");
negPhrases.add("rule out for");
negPhrases.add("rule him out for");
negPhrases.add("rule her out for");
negPhrases.add("rule the patient out for");
negPhrases.add("rule him out");
negPhrases.add("rule her out");
negPhrases.add("rule out");
negPhrases.add("r/o");
negPhrases.add("ro");
negPhrases.add("rule the patient out");
negPhrases.add("rules out");
negPhrases.add("rules him out");
negPhrases.add("rules her out");
negPhrases.add("ruled the patient out for");
negPhrases.add("rules the patient out");
negPhrases.add("ruled him out against");
negPhrases.add("ruled her out against");
negPhrases.add("ruled him out");
negPhrases.add("ruled her out");
negPhrases.add("ruled out against");
negPhrases.add("ruled the patient out against");
negPhrases.add("did rule out for");
negPhrases.add("did rule out against");
negPhrases.add("did rule out");
negPhrases.add("did rule him out for");
negPhrases.add("did rule him out against");
negPhrases.add("did rule him out");
negPhrases.add("did rule her out for");
negPhrases.add("did rule her out against");
negPhrases.add("did rule her out");
negPhrases.add("did rule the patient out against");
negPhrases.add("did rule the patient out for");
negPhrases.add("did rule the patient out");
negPhrases.add("can rule out for");
negPhrases.add("can rule out against");
negPhrases.add("can rule out");
negPhrases.add("can rule him out for");
negPhrases.add("can rule him out against");
negPhrases.add("can rule him out");
negPhrases.add("can rule her out for");
negPhrases.add("can rule her out against");
negPhrases.add("can rule her out");
negPhrases.add("can rule the patient out for");
negPhrases.add("can rule the patient out against");
negPhrases.add("can rule the patient out");
negPhrases.add("adequate to rule out for");
negPhrases.add("adequate to rule out");
negPhrases.add("adequate to rule him out for");
negPhrases.add("adequate to rule him out");
negPhrases.add("adequate to rule her out for");
negPhrases.add("adequate to rule her out");
negPhrases.add("adequate to rule the patient out for");
negPhrases.add("adequate to rule the patient out against");
negPhrases.add("adequate to rule the patient out");
negPhrases.add("sufficient to rule out for");
negPhrases.add("sufficient to rule out against");
negPhrases.add("sufficient to rule out");
negPhrases.add("sufficient to rule him out for");
negPhrases.add("sufficient to rule him out against");
negPhrases.add("sufficient to rule him out");
negPhrases.add("sufficient to rule her out for");
negPhrases.add("sufficient to rule her out against");
negPhrases.add("sufficient to rule her out");
negPhrases.add("sufficient to rule the patient out for");
negPhrases.add("sufficient to rule the patient out against");
negPhrases.add("sufficient to rule the patient out");
negPhrases.add("what must be ruled out is");
negPhrases.add("ain't");
negPhrases.add("aint");
negPhrases.add("can't");
negPhrases.add("cant");
negPhrases.add("daren't");
negPhrases.add("darent");
negPhrases.add("didn't");
negPhrases.add("didnt");
negPhrases.add("doesn't");
negPhrases.add("doesnt");
negPhrases.add("don't");
negPhrases.add("dont");
negPhrases.add("hadn't");
negPhrases.add("hadnt");
negPhrases.add("hardly");
negPhrases.add("hardly");
negPhrases.add("hasn't");
negPhrases.add("hasnt");
negPhrases.add("haven't");
negPhrases.add("havent");
negPhrases.add("havn't");
negPhrases.add("havnt");
negPhrases.add("isn't");
negPhrases.add("isnt");
negPhrases.add("lack");
negPhrases.add("lacking");
negPhrases.add("lacks");
negPhrases.add("mightn't");
negPhrases.add("mightnt");
negPhrases.add("mustn't");
negPhrases.add("mustnt");
negPhrases.add("needn't");
negPhrases.add("neednt");
negPhrases.add("neither");
negPhrases.add("never");
negPhrases.add("nobody");
negPhrases.add("none");
negPhrases.add("nor");
negPhrases.add("nothing");
negPhrases.add("nowhere");
negPhrases.add("oughtn't");
negPhrases.add("oughtnt");
negPhrases.add("shan't");
negPhrases.add("shant");
negPhrases.add("shouldn't");
negPhrases.add("shouldnt");
negPhrases.add("wasn't");
negPhrases.add("wasnt");
negPhrases.add("without");
negPhrases.add("without");
negPhrases.add("wouldn't");
negPhrases.add("wouldnt");
postNegPhrases.add("should be ruled out for");
postNegPhrases.add("ought to be ruled out for");
postNegPhrases.add("may be ruled out for");
postNegPhrases.add("might be ruled out for");
postNegPhrases.add("could be ruled out for");
postNegPhrases.add("will be ruled out for");
postNegPhrases.add("can be ruled out for");
postNegPhrases.add("must be ruled out for");
postNegPhrases.add("is to be ruled out for");
postNegPhrases.add("be ruled out for");
postNegPhrases.add("unlikely");
postNegPhrases.add("free");
postNegPhrases.add("was ruled out");
postNegPhrases.add("is ruled out");
postNegPhrases.add("are ruled out");
postNegPhrases.add("have been ruled out");
postNegPhrases.add("has been ruled out");
postNegPhrases.add("being ruled out");
postNegPhrases.add("should be ruled out");
postNegPhrases.add("ought to be ruled out");
postNegPhrases.add("may be ruled out");
postNegPhrases.add("might be ruled out");
postNegPhrases.add("could be ruled out");
postNegPhrases.add("will be ruled out");
postNegPhrases.add("can be ruled out");
postNegPhrases.add("must be ruled out");
postNegPhrases.add("is to be ruled out");
postNegPhrases.add("be ruled out");
conjunctions.add("but");
conjunctions.add("however");
conjunctions.add("nevertheless");
conjunctions.add("yet");
conjunctions.add("though");
conjunctions.add("although");
conjunctions.add("still");
conjunctions.add("aside from");
conjunctions.add("except");
conjunctions.add("apart from");
conjunctions.add("secondary to");
conjunctions.add("as the cause of");
conjunctions.add("as the source of");
conjunctions.add("as the reason of");
conjunctions.add("as the etiology of");
conjunctions.add("as the origin of");
conjunctions.add("as the cause for");
conjunctions.add("as the source for");
conjunctions.add("as the reason for");
conjunctions.add("as the etiology for");
conjunctions.add("as the origin for");
conjunctions.add("as the secondary cause of");
conjunctions.add("as the secondary source of");
conjunctions.add("as the secondary reason of");
conjunctions.add("as the secondary etiology of");
conjunctions.add("as the secondary origin of");
conjunctions.add("as the secondary cause for");
conjunctions.add("as the secondary source for");
conjunctions.add("as the secondary reason for");
conjunctions.add("as the secondary etiology for");
conjunctions.add("as the secondary origin for");
conjunctions.add("as a cause of");
conjunctions.add("as a source of");
conjunctions.add("as a reason of");
conjunctions.add("as a etiology of");
conjunctions.add("as a cause for");
conjunctions.add("as a source for");
conjunctions.add("as a reason for");
conjunctions.add("as a etiology for");
conjunctions.add("as a secondary cause of");
conjunctions.add("as a secondary source of");
conjunctions.add("as a secondary reason of");
conjunctions.add("as a secondary etiology of");
conjunctions.add("as a secondary origin of");
conjunctions.add("as a secondary cause for");
conjunctions.add("as a secondary source for");
conjunctions.add("as a secondary reason for");
conjunctions.add("as a secondary etiology for");
conjunctions.add("as a secondary origin for");
conjunctions.add("cause of");
conjunctions.add("cause for");
conjunctions.add("causes of");
conjunctions.add("causes for");
conjunctions.add("source of");
conjunctions.add("source for");
conjunctions.add("sources of");
conjunctions.add("sources for");
conjunctions.add("reason of");
conjunctions.add("reason for");
conjunctions.add("reasons of");
conjunctions.add("reasons for");
conjunctions.add("etiology of");
conjunctions.add("etiology for");
conjunctions.add("trigger event for");
conjunctions.add("origin of");
conjunctions.add("origin for");
conjunctions.add("origins of");
conjunctions.add("origins for");
conjunctions.add("other possibilities of");
}
}