package rainbownlp.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import rainbownlp.core.Phrase;
import rainbownlp.parser.DependencyLine;
public class StanfordDependencyUtil {
public static Boolean haveDirectRelation(String target1, String target2,
ArrayList<DependencyLine> dependencies)
{
Boolean has_relation = false;
for (DependencyLine dep_line:dependencies)
{
// TODO: remove
// if (target1==null || target2==null)
// continue;
if ((dep_line.firstPart.equals(target1) &&
dep_line.secondPart.equals(target2))
|| (dep_line.firstPart.equals(target2) &&
dep_line.secondPart.equals(target1)))
{
has_relation =true;
break;
}
}
return has_relation;
}
public static Boolean haveDirectRelation(Phrase p1, Phrase p2,
ArrayList<DependencyLine> dependencies)
{
Boolean has_relation = false;
String target1 = p1.getNormalizedHead();
String target2 = p2.getNormalizedHead();
Integer offset1 = p1.getNormalOffset()+1;
Integer offset2 = p2.getNormalOffset()+1;
for (DependencyLine dep_line:dependencies)
{
if ((dep_line.firstPart.equals(target1) &&
dep_line.firstOffset==offset1 &&
dep_line.secondPart.equals(target2)
&& dep_line.secondOffset == offset2)
|| (dep_line.firstPart.equals(target2) &&
dep_line.firstOffset == offset2 &&
dep_line.secondOffset ==offset1 &&
dep_line.secondPart.equals(target1)))
{
has_relation =true;
break;
}
}
return has_relation;
}
public static Boolean haveDirectRelation(String target1, Integer offset1,
String target2, Integer offset2,
ArrayList<DependencyLine> dependencies)
{
Boolean has_relation = false;
for (DependencyLine dep_line:dependencies)
{
if ((dep_line.firstPart.equals(target1) &&
dep_line.firstOffset==offset1 &&
dep_line.secondPart.equals(target2)
&& dep_line.secondOffset == offset2)
|| (dep_line.firstPart.equals(target2) &&
dep_line.firstOffset == offset2 &&
dep_line.secondOffset ==offset1 &&
dep_line.secondPart.equals(target1)))
{
has_relation =true;
break;
}
}
return has_relation;
}
//this method returns all governor Dependency lines to a target
public static List<DependencyLine> getAllGovernors(
ArrayList<DependencyLine> dep_lines, String target)
{
List<DependencyLine> gov_deps = new ArrayList<DependencyLine> ();
for (DependencyLine dep: dep_lines)
{
if (dep.secondPart.equals(target))
{
gov_deps.add(dep);
}
}
return gov_deps;
}
public static List<DependencyLine> getAllGovernors(
ArrayList<DependencyLine> dep_lines, String target, int offset)
{
List<DependencyLine> gov_deps = new ArrayList<DependencyLine> ();
for (DependencyLine dep: dep_lines)
{
// if (dep.secondPart.equals(target) && dep.secondOffset==offset)
if (dep.secondOffset==offset)
{
gov_deps.add(dep);
}
}
return gov_deps;
}
public static List<DependencyLine> getAllDependents(
ArrayList<DependencyLine> dep_lines, String target, int offset)
{
List<DependencyLine> dependent_deps = new ArrayList<DependencyLine> ();
for (DependencyLine dep: dep_lines)
{
// if (dep.firstPart.equals(target) && dep.firstOffset==offset)
if (dep.firstOffset==offset)
{
dependent_deps.add(dep);
}
}
return dependent_deps;
}
public static ArrayList<DependencyLine> parseDepLinesFromString(
String dependency_string) {
ArrayList<DependencyLine> dep_lines = new ArrayList<DependencyLine>();
String[] dependencies;
dependencies = dependency_string.split("\n");
for(String dependency:dependencies)
{
DependencyLine curLine = parseDependencyLine(dependency);
dep_lines.add(curLine);
}
return dep_lines;
}
public static Boolean haveGovernorInCommon(
List<DependencyLine> target1_govs, List<DependencyLine> target2_govs) {
Boolean have_gov_in_common= false;
for (DependencyLine dep1: target1_govs)
for (DependencyLine dep2: target2_govs)
{
if(dep1.firstPart.equals(dep2.firstPart)
&& dep1.firstOffset==dep2.firstOffset)
{
have_gov_in_common= true;
return have_gov_in_common;
}
}
return have_gov_in_common;
}
//TODO : change this to consider offset also
public static Boolean areConjuncted(ArrayList<DependencyLine> dep_lines,String target1, String target2,
String connector) {
Boolean are_conjucted= false;
for (DependencyLine dep_line: dep_lines)
{
if (dep_line.relationName.equals("conj_"+connector))
{
if((dep_line.firstPart.equals(target1) && dep_line.secondPart.equals(target2))
|| (dep_line.firstPart.equals(target2) && dep_line.secondPart.equals(target1)))
{
are_conjucted = true;
break;
}
}
}
return are_conjucted;
}
public static Boolean areConjuncted(ArrayList<DependencyLine> dep_lines,
Phrase p1, Phrase p2,
String connector) {
Boolean are_conjucted= false;
String target1 = p1.getNormalizedHead();
String target2 = p2.getNormalizedHead();
Integer offset1 = p1.getNormalOffset()+1;
Integer offset2 = p2.getNormalOffset()+1;
for (DependencyLine dep_line: dep_lines)
{
if (dep_line.relationName.equals("conj_"+connector))
{
if((dep_line.firstPart.equals(target1) && dep_line.firstOffset==offset1 && dep_line.secondPart.equals(target2) && dep_line.secondOffset==offset2)
|| (dep_line.firstPart.equals(target2) && dep_line.secondPart.equals(target1)
&& dep_line.firstOffset==offset2 && dep_line.secondOffset==offset1))
{
are_conjucted = true;
break;
}
}
}
return are_conjucted;
}
public static DependencyLine parseDependencyLine(String dependency) {
DependencyLine res = new DependencyLine();
// String[] depParts = dependency.split("\\(|\\)|(, )");
Pattern p = Pattern.compile("(.+)\\((.*)\\-(\\d+)(\\D+)?, (.*)\\-(\\d+)(\\D+)?\\)");
Matcher m = p.matcher(dependency);
if(m.matches())
{
res.relationName = m.group(1);
res.firstPart = m.group(2);
res.firstOffset = Integer.parseInt(m.group(3));
res.secondPart = m.group(5);
res.secondOffset = Integer.parseInt(m.group(6));
}
return res;
}
public static Boolean areDirectlyConnected(String first_string, int first_offset,
String second_string, int second_offset, ArrayList<DependencyLine> dep_lines)
{
Boolean are_connected= false;
for(DependencyLine dep_line:dep_lines)
{
if ((((dep_line.firstPart.equals(first_string) && dep_line.firstOffset==first_offset)) &&
(dep_line.secondPart.equals(second_string) && dep_line.secondOffset== second_offset))
|| ((dep_line.firstPart.equals(second_string) && dep_line.firstOffset==second_offset ) &&
(dep_line.secondPart.equals(first_string) && dep_line.secondOffset== first_offset)))
{
are_connected =true;
break;
}
}
return are_connected;
}
// TODO:This is not checking the offset!
public static Boolean areGovernorsDirectlyConnected(String target1,String target2, ArrayList<DependencyLine> dep_lines)
{
Boolean are_connected= false;
List<DependencyLine> target1_govs = getAllGovernors(dep_lines, target1);
List<DependencyLine> target2_govs = getAllGovernors(dep_lines, target2);
for(DependencyLine target1_gov:target1_govs)
{
for(DependencyLine target2_gov:target2_govs)
{
if (areDirectlyConnected(target1_gov.firstPart, target1_gov.firstOffset,
target2_gov.firstPart, target2_gov.firstOffset, dep_lines))
{
are_connected =true;
return are_connected;
}
}
}
//check if they are directly connected
return are_connected;
}
public static HashMap<Integer, String> getTokens(String pos_tags) throws Exception
{
HashMap<Integer, String> tokensMap = new HashMap<Integer, String>();
String[] words_tags = pos_tags.split(" ");
Pattern p = Pattern.compile("(.*)\\/([^\\/]+)");
for(int i=0;i<words_tags.length;i++){
Matcher m = p.matcher(words_tags[i]);
if (m.matches())
{
String content = m.group(1);
String word = content.replaceAll("\\\\/", "/");
tokensMap.put(i+1, word);
}
else
{
throw (new Exception("the POS tag doesn't match the pattern"));
}
}
return tokensMap;
}
public static HashMap<Integer, String> getLemmaMap(String pos_tags) throws Exception
{
HashMap<Integer, String> lemmaMap = new HashMap<Integer, String>();
String[] words_tags = pos_tags.split(" ");
Pattern p = Pattern.compile("(.*)\\/([^\\/]+)");
for(int i=0;i<words_tags.length;i++){
Matcher m = p.matcher(words_tags[i]);
if (m.matches())
{
String content = m.group(1);
String word = content.replaceAll("\\\\/", "/");
lemmaMap.put(i+1, StringUtil.getWordLemma(word));
}
else
{
throw (new Exception("the POS tag doesn't match the pattern"));
}
}
return lemmaMap;
}
public static HashMap<String, String> getLemmaTokenmaps(String pos_tags) throws Exception
{
HashMap<String, String> lemma_token_map = new HashMap<String, String>();
String[] words_tags = pos_tags.split(" ");
Pattern p = Pattern.compile("(.*)\\/([^\\/]+)");
for(int i=0;i<words_tags.length;i++){
Matcher m = p.matcher(words_tags[i]);
if (m.matches())
{
String content = m.group(1);
String word = content.replaceAll("\\\\/", "/");
lemma_token_map.put(StringUtil.getWordLemma(word), word);
}
else
{
throw (new Exception("the POS tag doesn't match the pattern"));
}
}
return lemma_token_map;
}
// prep_on(admitted-19, date-23) returns this line for date
public static List<DependencyLine> getRelatedGovsInPrep(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
List<DependencyLine> prep_govs = new ArrayList<DependencyLine>();
List<DependencyLine> all_govs = getAllGovernors(dep_lines,content,offset);
for(DependencyLine gov: all_govs)
{
if (gov.relationName.startsWith("prep_"))
{
prep_govs.add(gov);
}
}
return prep_govs;
}
//Time related governers now are advmod and prep_
public static List<DependencyLine> getTimeRelatedGovs(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
List<DependencyLine> time_rel_govs = new ArrayList<DependencyLine>();
List<DependencyLine> all_govs = getAllGovernors(dep_lines,content,offset);
for(DependencyLine gov: all_govs)
{
if (gov.relationName.startsWith("prep_on")
|| gov.relationName.startsWith("prep_of")
|| gov.relationName.startsWith("prep_at")
|| gov.relationName.startsWith("prep_in"))
{
time_rel_govs.add(gov);
}
else if(gov.relationName.startsWith("advmod")
|| gov.relationName.startsWith("rel") )
{
time_rel_govs.add(gov);
}
}
return time_rel_govs;
}
// this will return the whole dep line but we know that we look for the second part
public static List<DependencyLine> getRelatedDependentsInPrep(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
List<DependencyLine> prep_dependents = new ArrayList<DependencyLine>();
List<DependencyLine> all_dependents =getAllDependents(dep_lines, content, offset);
for(DependencyLine dependent: all_dependents)
{
if (dependent.relationName.startsWith("prep_"))
{
prep_dependents.add(dependent);
}
}
return prep_dependents;
}
public static List<DependencyLine> getTimeRelatedDependents(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
List<DependencyLine> time_rel_deps = new ArrayList<DependencyLine>();
List<DependencyLine> all_deps = getAllDependents(dep_lines,content,offset);
for(DependencyLine dependent: all_deps)
{
if (dependent.relationName.startsWith("prep_on")
|| dependent.relationName.startsWith("prep_of")
|| dependent.relationName.startsWith("prep_at")
|| dependent.relationName.startsWith("prep_in"))
{
time_rel_deps.add(dependent);
}
}
return time_rel_deps;
}
public static HashMap<Integer, String> getRelatedWords(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
HashMap<Integer, String> related_words = new HashMap<Integer, String>();
List<DependencyLine> dependents = getAllDependents(dep_lines, content, offset);
for(DependencyLine dep: dependents)
{
related_words.put(dep.secondOffset, dep.secondPart);
}
List<DependencyLine> governers = getAllGovernors(dep_lines,content,offset);
for(DependencyLine gov: governers)
{
related_words.put(gov.firstOffset, gov.firstPart);
}
return related_words;
}
public static List<DependencyLine> getAllRelatedDepLines(
ArrayList<DependencyLine> dep_lines, String target, int offset)
{
List<DependencyLine> related = new ArrayList<DependencyLine> ();
for (DependencyLine dep: dep_lines)
{
if ((dep.secondPart.equals(target) && dep.secondOffset==offset)
|| (dep.firstPart.equals(target) && dep.firstOffset==offset))
{
related.add(dep);
}
}
return related;
}
// prep_on(admitted,date) returns admitted or prep_of(date,admission)
public static List<String> getAllArgsInPrep
(String content, int offset,ArrayList<DependencyLine> dep_lines)
{
List<String> args_in_prep = new ArrayList<String>();
List<DependencyLine> dependents_in_pre = StanfordDependencyUtil.getRelatedDependentsInPrep
(content, offset, dep_lines);
for (DependencyLine dependent: dependents_in_pre)
{
args_in_prep.add(dependent.secondPart);
}
List<DependencyLine> govs_in_pre = StanfordDependencyUtil.getRelatedGovsInPrep
(content, offset, dep_lines);
for(DependencyLine gov_dep : govs_in_pre)
{
args_in_prep.add(gov_dep.firstPart);
}
return args_in_prep;
}
//just returns the dep line of p1 is gov
public static DependencyLine getRelatedDependencyBetween(Phrase phrase1, Phrase Phrase2, ArrayList<DependencyLine> dep_lines)
{
DependencyLine between_dep_line= null;
String target1 = phrase1.getNormalizedHead();
String target2 = Phrase2.getNormalizedHead();
Integer offset1 = phrase1.getNormalOffset()+1;
Integer offset2 = Phrase2.getNormalOffset()+1;
for(DependencyLine dep_line:dep_lines)
{
if ((((dep_line.firstPart.equals(target1) && dep_line.firstOffset==offset1)) &&
(dep_line.secondPart.equals(target2) && dep_line.secondOffset== offset2)))
{
between_dep_line =dep_line;
break;
}
}
return between_dep_line;
}
}