package edu.stanford.nlp.coref.misc; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import edu.stanford.nlp.coref.CorefAlgorithm; import edu.stanford.nlp.coref.CorefSystem; import edu.stanford.nlp.coref.CorefUtils; import edu.stanford.nlp.coref.data.Dictionaries; import edu.stanford.nlp.coref.data.Document; import edu.stanford.nlp.coref.data.DocumentMaker; import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.util.StringUtils; /** * Class for loading coreference links from a file and then performing them on CoNLL data. * Each line of the file should contain a document id followed by a tab followed by a * space-separated list of pairs of mention ids, separated by commas, to be merged * (e.g., 0\t2,3 2,5 4,9). * @author Kevin Clark */ public class FromFileCorefAlgorithm implements CorefAlgorithm { private final Map<Integer, List<Pair<Integer, Integer>>> toMerge = new HashMap<>(); private int currentDocId = 0; public FromFileCorefAlgorithm(String savedLinkPath) { try(BufferedReader br = new BufferedReader(new FileReader(savedLinkPath))) { br.lines().forEach(line -> { String[] split = line.split("\t"); int did = Integer.valueOf(split[0]); List<Pair<Integer, Integer>> docMerges = toMerge.get(did); if (docMerges == null) { docMerges = new ArrayList<>(); toMerge.put(did, docMerges); } if (split.length > 1) { String[] pairs = split[1].split(" "); for (String pair : pairs) { String[] ms = pair.split(","); docMerges.add(new Pair<>(Integer.valueOf(ms[0]), Integer.valueOf(ms[1]))); } } }); } catch (IOException e) { throw new RuntimeException("Error reading saved links", e); } } @Override public void runCoref(Document document) { if (toMerge.containsKey(currentDocId)) { for (Pair<Integer, Integer> pair : toMerge.get(currentDocId)) { CorefUtils.mergeCoreferenceClusters(pair, document); } } currentDocId += 1; } public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(new String[] {"-props", args[0]}); new CorefSystem(new DocumentMaker(props, new Dictionaries(props)), new FromFileCorefAlgorithm(args[1]), true, false).runOnConll(props); } }