package edu.stanford.nlp.parser.dvparser;
import java.io.PrintWriter;
import java.util.List;
import java.util.TreeSet;
import org.ejml.simple.SimpleMatrix;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.metrics.Eval;
import edu.stanford.nlp.trees.Tree;
/**
* Prints out words which are unknown to the DVParser.
* <br>
* This does not have to be specific to the DVParser. We could easily
* add an interface which lets it call something to ask if the word is
* known or not, and if not, keeps track of those words.
*
* @author John Bauer
*/
public class UnknownWordPrinter implements Eval {
final DVModel model;
final SimpleMatrix unk;
final TreeSet<String> unkWords = new TreeSet<>();
public UnknownWordPrinter(DVModel model) {
this.model = model;
this.unk = model.getUnknownWordVector();
}
@Override
public void evaluate(Tree guess, Tree gold) {
evaluate(guess, gold, new PrintWriter(System.out, true));
}
@Override
public void evaluate(Tree guess, Tree gold, PrintWriter pw) {
evaluate(guess, gold, pw, 1.0);
}
@Override
public void evaluate(Tree guess, Tree gold, PrintWriter pw, double weight) {
List<Label> words = guess.yield();
int pos = 0;
for (Label word : words) {
++pos;
SimpleMatrix wv = model.getWordVector(word.value());
// would be faster but more implementation-specific if we
// removed wv.equals
if (wv == unk || wv.equals(unk)) {
pw.printf(" Unknown word in position %d: %s%n", pos, word.value());
unkWords.add(word.value());
}
}
}
@Override
public void display(boolean verbose) {
display(verbose, new PrintWriter(System.out, true));
}
@Override
public void display(boolean verbose, PrintWriter pw) {
if (unkWords.isEmpty()) {
pw.printf("UnknownWordPrinter: all words known by DVModel%n");
} else {
pw.printf("UnknownWordPrinter: the following words are unknown%n");
for (String word : unkWords) {
pw.printf(" %s%n", word);
}
}
}
}