package edu.stanford.nlp.parser.ensemble.utils;
import org.maltparser.core.helper.SystemLogger;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.*;
public class Token implements Dependency {
int id;
String form;
String lemma;
String pos;
String cpos;
String feats;
int head;
String label;
double score;
// List of models that proposed this dependency
List<Integer> models;
// Max number of votes received for a dependency with this modifier (useful for the isolated dependency analysis)
int maxVotes;
public Token(int id, String f, String pos, int h, String lbl) {
construct(id, f, pos, h, lbl, Double.NEGATIVE_INFINITY);
}
public Token(int id, String f, String pos, int h, String lbl, double score) {
construct(id, f, pos, h, lbl, score);
}
public Token(int id, String f, String l, String cpos, String pos, String feats, int h, String lbl) {
construct(id, f, cpos, h, lbl, Double.NEGATIVE_INFINITY);
this.lemma = l;
this.pos = pos;
this.feats = feats;
}
public Token(int id, String f, String l, String cpos, String pos, String feats, int h, String lbl, double score) {
construct(id, f, pos, h, lbl, score);
this.lemma = l;
this.pos = pos;
this.feats = feats;
}
private void construct(int id, String f, String pos, int h, String l, double score) {
this.id = id;
this.form = f;
this.lemma = f;
this.pos = pos;
this.cpos = pos;
this.feats = "_";
this.head = h;
this.label = l;
this.score = score;
this.models = null;
maxVotes = 0;
}
@Override
public int head() {
return head;
}
@Override
public int mod() {
return id;
}
@Override
public String label() {
return label;
}
@Override
public double score() {
return score;
}
public void setScore(double s) {
score = s;
}
void addModel(int m) {
if (models == null) {
models = new ArrayList<Integer>();
}
models.add(m);
}
List<Integer> getModels() {
return models;
}
/**
* Prints the token in CoNLL-X format
*/
@Override
public String toString() {
StringBuilder os = new StringBuilder();
os.append(id).append("\t").append(form).append("\t").append(lemma).append("\t").append(cpos).append("\t").append(pos).append("\t").append(feats).append("\t").append(head).append("\t").append(label.toUpperCase()).append("\t_\t_");
return os.toString();
}
@Override
public boolean sameDependency(Dependency other) {
if (head == other.head() && label.equalsIgnoreCase(other.label())) {
return true;
}
return false;
}
public static List<Token> readNextSentCoNLLX(BufferedReader is) throws IOException {
List<Token> sent = new ArrayList<Token>();
String line;
while ((line = is.readLine()) != null) {
line = line.trim();
if (line.length() == 0) {
break;
}
String[] toks = line.split("[\t]+");
int id = Integer.parseInt(toks[0]);
String form = toks[1];
String lemma = toks[2];
String cpos = toks[3];
String pos = toks[4];
String feats = toks[5];
int head = Integer.parseInt(toks[6]);
String label = normLabel(toks[7]);
sent.add(new Token(id, form, lemma, cpos, pos, feats, head, label));
}
if (sent.isEmpty()) {
return null;
}
return sent;
}
public static String normLabel(String l) {
if (l.equalsIgnoreCase("null")) {
return "root";
}
return l.toLowerCase();
}
public static List<Token> readNextSentCoNLL08(BufferedReader is) throws IOException {
List<Token> sent = new ArrayList<Token>();
String line;
while ((line = is.readLine()) != null) {
line = line.trim();
if (line.length() == 0) {
break;
}
String[] toks = line.split("[\t]+");
int id = Integer.parseInt(toks[0]);
String form = toks[5];
String pos = toks[7];
int head = Integer.parseInt(toks[8]);
String label = normLabel(toks[9]);
sent.add(new Token(id, form, pos, head, label));
}
if (sent.isEmpty()) {
return null;
}
return sent;
}
public static void writeSentCoNLLX(List<Token> sentence, BufferedWriter os) throws IOException {
for (Token tok : sentence) {
os.write(tok.toString() + "\n");
}
os.newLine(); // blank line separate sentences
}
public static List<Token> mergeSentences(List<Token>[] sents) {
List<Token> uniques = new ArrayList<Token>();
for (int mod = 0; mod < sents[0].size(); mod++) {
boolean[] used = new boolean[sents.length];
List<Token> chosen = new ArrayList<Token>();
int maxVotes = 0;
for (int model = 0; model < sents.length; model++) {
if (used[model] == true) {
continue;
}
Token crt = sents[model].get(mod);
crt.addModel(model);
int votes = 1;
for (int i = model + 1; i < sents.length; i++) {
Token other = sents[i].get(mod);
if (crt.sameDependency(other)) {
crt.addModel(i);
used[i] = true;
votes++;
}
}
if (maxVotes < votes) {
maxVotes = votes;
}
chosen.add(crt);
}
for (Token u : chosen) {
u.maxVotes = maxVotes;
uniques.add(u);
}
}
return uniques;
}
public static final Set<String> NO_ROOT_POS =
new HashSet<String>(Arrays.asList(new String[]{".", ",", "``", "''", ":", "-LRB-", "-RRB-"}));
public static void fixMultipleRoots(List<Token> deps) {
List<Token> roots = new ArrayList<Token>();
Token realRoot = null;
for (int i = 0; i < deps.size(); i++) {
if (deps.get(i).head == 0) {
roots.add(deps.get(i));
if (realRoot == null && !NO_ROOT_POS.contains(deps.get(i).pos)) {
realRoot = deps.get(i);
}
}
}
if (roots.size() > 1) {
if (realRoot == null) {
SystemLogger.logger().debug("Found no real root! Roots: " + roots);
realRoot = roots.get(0);
}
for (Token w : roots) {
if (w != realRoot) {
w.head = realRoot.id;
}
}
}
}
}