package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.util.*;
import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.CorefChain.CorefMention;
import edu.stanford.nlp.coref.hybrid.HybridCorefSystem;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.util.*;
public class HybridCorefAnnotator extends TextAnnotationCreator implements Annotator {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(HybridCorefAnnotator.class);
private static final boolean VERBOSE = false;
private final HybridCorefSystem corefSystem;
// for backward compatibility
private final boolean OLD_FORMAT;
public HybridCorefAnnotator(Properties props) {
try {
// Load the default properties
Properties corefProps = new Properties();
try {
corefProps.load(IOUtils.readerFromString("edu/stanford/nlp/hcoref/properties/coref-default-dep.properties"));
} catch (IOException ignored) { }
// Add passed properties
Enumeration<Object> keys = props.keys();
while (keys.hasMoreElements()) {
String key = keys.nextElement().toString();
corefProps.setProperty(key, props.getProperty(key));
}
// Create coref system
corefSystem = new HybridCorefSystem(corefProps);
OLD_FORMAT = Boolean.parseBoolean(props.getProperty("oldCorefFormat", "false"));
} catch (Exception e) {
log.error("cannot create HybridCorefAnnotator!");
e.printStackTrace();
throw new RuntimeException(e);
}
}
@Override
public void annotate(Annotation annotation){
try {
if (!annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
log.error("this coreference resolution system requires SentencesAnnotation!");
return;
}
if (hasSpeakerAnnotations(annotation)) {
annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
}
Document corefDoc = corefSystem.docMaker.makeDocument(annotation);
Map<Integer, CorefChain> result = corefSystem.coref(corefDoc);
annotation.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
// for backward compatibility
if(OLD_FORMAT) annotateOldFormat(result, corefDoc);
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static List<Pair<IntTuple, IntTuple>> getLinks(Map<Integer, CorefChain> result) {
List<Pair<IntTuple, IntTuple>> links = new ArrayList<>();
CorefChain.CorefMentionComparator comparator = new CorefChain.CorefMentionComparator();
for(CorefChain c : result.values()) {
List<CorefMention> s = c.getMentionsInTextualOrder();
for(CorefMention m1 : s){
for(CorefMention m2 : s){
if(comparator.compare(m1, m2)==1) links.add(new Pair<>(m1.position, m2.position));
}
}
}
return links;
}
private static void annotateOldFormat(Map<Integer, CorefChain> result, Document corefDoc) {
List<Pair<IntTuple, IntTuple>> links = getLinks(result);
Annotation annotation = corefDoc.annotation;
if(VERBOSE){
System.err.printf("Found %d coreference links:%n", links.size());
for(Pair<IntTuple, IntTuple> link: links){
System.err.printf("LINK (%d, %d) -> (%d, %d)%n", link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
}
}
//
// save the coref output as CorefGraphAnnotation
//
// this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
List<Pair<IntTuple, IntTuple>> graph = new ArrayList<>();
for(Pair<IntTuple, IntTuple> link: links){
//
// Note: all offsets in the graph start at 1 (not at 0!)
// we do this for consistency reasons, as indices for syntactic dependencies start at 1
//
int srcSent = link.first.get(0);
int srcTok = corefDoc.getOrderedMentions().get(srcSent - 1).get(link.first.get(1)-1).headIndex + 1;
int dstSent = link.second.get(0);
int dstTok = corefDoc.getOrderedMentions().get(dstSent - 1).get(link.second.get(1)-1).headIndex + 1;
IntTuple dst = new IntTuple(2);
dst.set(0, dstSent);
dst.set(1, dstTok);
IntTuple src = new IntTuple(2);
src.set(0, srcSent);
src.set(1, srcTok);
graph.add(new Pair<>(src, dst));
}
annotation.set(CorefCoreAnnotations.CorefGraphAnnotation.class, graph);
for (CorefChain corefChain : result.values()) {
if(corefChain.getMentionsInTextualOrder().size() < 2) continue;
Set<CoreLabel> coreferentTokens = Generics.newHashSet();
for (CorefMention mention : corefChain.getMentionsInTextualOrder()) {
CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1);
CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(mention.headIndex - 1);
coreferentTokens.add(token);
}
for (CoreLabel token : coreferentTokens) {
token.set(CorefCoreAnnotations.CorefClusterAnnotation.class, coreferentTokens);
}
}
}
private static boolean hasSpeakerAnnotations(Annotation annotation) {
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel t : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
return true;
}
}
}
return false;
}
@Override
public Set<Class<? extends CoreAnnotation>> requires() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
CoreAnnotations.TokensAnnotation.class,
CoreAnnotations.SentencesAnnotation.class,
SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class,
SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class,
SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class,
CorefCoreAnnotations.CorefMentionsAnnotation.class
)));
}
@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
return Collections.singleton(CorefChainAnnotation.class);
}
private static Annotation testEnglish() {
String text = "Barack Obama is the president of United States. He visited California last week.";
return testAnnoation(text,new String[] {
"-props", "edu/stanford/nlp/hcoref/properties/coref-default-dep.properties"
});
}
private static Annotation testChinese(){
// String text = "中国武道太学和中国书道太学成立。新华社北京9月1日电。旨在振兴中华文化于"
// + "国际的中国武道太学和中国书道太学今天在北京成立。上述两所太学是在国家体委、"
// + "文化部、中国武术研究院、中国艺术研究院的关杯和支持下,在台湾著名企业家、书"
// + "画家、艺术品收藏家李志仁先生倡议和出资下,经国家教委和北京市成人教育局批准"
// + "而成立的。李志仁先生在台湾有“笔墨大王”之称,近几年先后出资一千万元新台币"
// + ",在中国大陆老、少、边、穷地区建立了百所小学,受到海内外人士的称赞。(完)\n";
String text = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , 米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , 可是 就 在 稍后 就 返回 莫斯科 。 这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , 因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 , 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , 强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";
return testAnnoation(text,new String[]{
"-props", "edu/stanford/nlp/hcoref/properties/zh-dcoref-default.properties"
});
}
private static Annotation testAnnoation(String text,String[] args){
Annotation document = new Annotation(text);
Properties props = StringUtils.argsToProperties(args);
StanfordCoreNLP corenlp = new StanfordCoreNLP(props);
corenlp.annotate(document);
HybridCorefAnnotator hcoref = new HybridCorefAnnotator(props);
hcoref.annotate(document);
return document;
}
public static void main(String[] args) {
// String text = "Since the implementation of the Individual Visit Scheme between Hong Kong and the mainland , more and more mainland tourists are coming to visit Hong Kong. "
// +"From the beginning up till now , more than seven million individual tourists , have come to Hong Kong. "
// +"Well , we now , er , believe more will be coming . "
// +"At this point , it has been about two years . "
// +"Also , the current number of 34 cities will be increased . "
// +"Hong Kong was developed from a fishing harbor one hundred years ago to become today 's international metropolis . "
// +"Here , eastern and western cultures have gathered , and the new and the old coexist . "
// +"When in Hong Kong , you can wander among skyscrapers , heartily enjoy shopping sprees in well - known stores and malls for goods from various countries , and taste delicious snacks from all over the world at tea shops or at street stands in Mong Kok . "
// +"You can go to burn incense and make a vow at the Repulse Bay , where all deities gather . "
// +"You can enjoy the most charming sun - filled sandy beaches in Hong Kong. "
// +"You can ascend Victoria Peak to get a panoramic view of Victoria Harbor 's beautiful scenery . "
// +"Or hop onto a trolley with over a century of history , and feel the city 's blend of the old and the modern in slow motion .";
//
Annotation document = testChinese();
System.out.println(document.get(CorefChainAnnotation.class));
log.info();
}
}