///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2007 University of Texas at Austin and (C) 2005
// University of Pennsylvania and Copyright (C) 2002, 2003 University
// of Massachusetts Amherst, Department of Computer Science.
//
// This software is licensed under the terms of the Common Public
// License, Version 1.0 or (at your option) any subsequent version.
//
// The license is approved by the Open Source Initiative, and is
// available from their website at http://www.opensource.org.
///////////////////////////////////////////////////////////////////////////////
package mstparser.io;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import mstparser.DependencyInstance;
import mstparser.Util;
/**
* A reader for files in MST format.
*
* <p> Created: Sat Nov 10 15:25:10 2001 </p>
*
* @author Jason Baldridge
* @version $Id: MSTReader.java 94 2007-01-17 17:05:12Z jasonbaldridge $
* @see mstparser.io.DependencyReader
*/
public class MSTReader extends DependencyReader {
@Override
public DependencyInstance getNext() throws IOException {
String line = inputReader.readLine();
String pos_line = inputReader.readLine();
String deprel_line = labeled ? inputReader.readLine() : pos_line;
String heads_line = inputReader.readLine();
String conf_line = confScores ? inputReader.readLine() : "";
inputReader.readLine(); // blank line
if (line == null) {
inputReader.close();
return null;
}
String[] forms = line.split("\t");
String[] pos = pos_line.split("\t");
String[] deprels = deprel_line.split("\t");
int[] heads = Util.stringsToInts(heads_line.split("\t"));
String[] forms_new = new String[forms.length + 1];
String[] pos_new = new String[pos.length + 1];
String[] deprels_new = new String[deprels.length + 1];
int[] heads_new = new int[heads.length + 1];
forms_new[0] = "<root>";
pos_new[0] = "<root-POS>";
deprels_new[0] = "<no-type>";
heads_new[0] = -1;
for (int i = 0; i < forms.length; i++) {
forms_new[i + 1] = normalize(forms[i], null);
pos_new[i + 1] = pos[i];
deprels_new[i + 1] = labeled ? deprels[i] : "<no-type>";
heads_new[i + 1] = heads[i];
}
double[] confs_new = null;
if (confScores) {
double[] confs = Util.stringsToDoubles(conf_line.split("\t"));
confs_new = new double[confs.length + 1];
confs_new[0] = 1;
System.arraycopy(confs, 0, confs_new, 1, forms.length);
}
DependencyInstance instance =
new DependencyInstance(forms_new, pos_new, deprels_new, heads_new, confs_new);
// set up the course pos tags as just the first letter of the fine-grained ones
String[] cpostags = new String[pos_new.length];
cpostags[0] = "<root-CPOS>";
for (int i = 1; i < pos_new.length; i++) {
cpostags[i] = pos_new[i].substring(0, 1);
}
instance.cpostags = cpostags;
// set up the lemmas as just the first 5 characters of the forms
String[] lemmas = new String[forms_new.length];
cpostags[0] = "<root-LEMMA>";
for (int i = 1; i < forms_new.length; i++) {
int formLength = forms_new[i].length();
lemmas[i] = formLength > 5 ? forms_new[i].substring(0, 5) : forms_new[i];
}
instance.lemmas = lemmas;
instance.feats = new String[0][0];
return instance;
}
@Override
protected boolean fileContainsLabels(String file) throws IOException {
String line;
try (BufferedReader in = new BufferedReader(new FileReader(file))) {
in.readLine();
in.readLine();
in.readLine();
if (confScores) {
in.readLine();
}
line = in.readLine();
}
if (line.trim().length() > 0) {
return true;
} else {
return false;
}
}
}