package mstparser;
import gnu.trove.TIntArrayList;
import java.io.*;
import mstparser.io.DependencyReader;
import mstparser.io.DependencyWriter;
public class DependencyPipe {
public Alphabet dataAlphabet;
public Alphabet typeAlphabet;
private DependencyReader depReader;
private DependencyWriter depWriter;
public String[] types;
public int[] typesInt;
public boolean labeled = false;
private boolean isCONLL = true;
public boolean separateLab = false; // afm 06-03-08
private ParserOptions options;
public DependencyPipe(ParserOptions options) throws IOException {
this.options = options;
if (!options.format.equals("CONLL") && !options.format.equals("CONLL2008")) { // afm 04-04-2008 --- Added second part (CONLL2008)
isCONLL = false;
}
separateLab = options.separateLab;
dataAlphabet = new Alphabet();
typeAlphabet = new Alphabet();
depReader = DependencyReader.createDependencyReader(options.format, options.discourseMode, options.stackedLevel1, options.useStemmingIfLemmasAbsent);
}
public void initInputFile(String file) throws IOException {
labeled = depReader.startReading(file);
}
public void initOutputFile(String file) throws IOException {
depWriter =
DependencyWriter.createDependencyWriter(options.format, labeled);
depWriter.startWriting(file);
}
public void outputInstance(DependencyInstance instance) throws IOException {
depWriter.write(instance);
}
public void close() throws IOException {
if (null != depWriter) {
depWriter.finishWriting();
}
}
public String getType(int typeIndex) {
return types[typeIndex];
}
protected final DependencyInstance nextInstance() throws IOException {
DependencyInstance instance = depReader.getNext();
if (instance == null || instance.forms == null) {
return null;
}
instance.setFeatureVector(createFeatureVector(instance));
String[] labs = instance.deprels;
int[] heads = instance.heads;
StringBuilder spans = new StringBuilder(heads.length * 5);
for (int i = 1; i < heads.length; i++) {
spans.append(heads[i]).append("|").append(i).append(":").append(typeAlphabet.lookupIndex(labs[i])).append(" ");
}
instance.actParseTree = spans.substring(0, spans.length() - 1);
return instance;
}
// afm 04-15-2008
public void printModelStats(Parameters params) {
double norm1total = 0.0;
int num_stacked = 0;
double norm1stacked = 0.0;
int num_wordfeat = 0;
double norm1wordfeat = 0.0;
DependencyParser.out.println("No. Features: " + dataAlphabet.numEntries);
Object[] keys = dataAlphabet.toArray();
for (int i = 0; i < keys.length; i++) {
int num = dataAlphabet.lookupIndex(keys[i]);
String feat = (String) keys[i];
double val = params != null ? params.parameters[num] : 0.0;
//DependencyParser.out.println(feat+" = "+val);
if (val < 0) {
val -= val;
}
norm1total += val;
if (feat.startsWith("STK_")) // Stacked feature!
{
num_stacked++;
norm1stacked += val;
} else if (feat.startsWith("FF") || feat.startsWith("LF")) {
num_wordfeat++;
norm1wordfeat += val;
}
}
DependencyParser.out.println("No. Stacked Features: " + num_stacked);
DependencyParser.out.println("No. Word+Feat Features: " + num_wordfeat);
DependencyParser.out.println("L1 norm of weight vector: " + norm1total);
DependencyParser.out.println("L1 norm of weight stacked subvector: " + norm1stacked);
DependencyParser.out.println("L1 norm of weight word+feat subvector: " + norm1wordfeat);
}
public int[] createInstances(String file,
File featFileName) throws IOException {
createAlphabet(file);
DependencyParser.out.println("Num Features: " + dataAlphabet.size());
if (options.separateLab == true) {
printModelStats(null);
}
labeled = depReader.startReading(file);
TIntArrayList lengths = new TIntArrayList();
ObjectOutputStream out = options.createForest
? new ObjectOutputStream(new FileOutputStream(featFileName))
: null;
DependencyInstance instance = depReader.getNext();
int num1 = 0;
DependencyParser.out.println("Creating Feature Vector Instances: ");
while (instance != null) {
DependencyParser.out.print(num1 + " ");
instance.setFeatureVector(createFeatureVector(instance));
String[] labs = instance.deprels;
int[] heads = instance.heads;
StringBuilder spans = new StringBuilder(heads.length * 5);
for (int i = 1; i < heads.length; i++) {
spans.append(heads[i]).append("|").append(i).append(":").append(typeAlphabet.lookupIndex(labs[i])).append(" ");
}
instance.actParseTree = spans.substring(0, spans.length() - 1);
lengths.add(instance.length());
if (options.createForest) {
writeInstance(instance, out);
}
instance = depReader.getNext();
num1++;
}
DependencyParser.out.println();
closeAlphabets();
if (options.createForest) {
out.close();
}
return lengths.toNativeArray();
}
private void createAlphabet(String file) throws IOException {
DependencyParser.out.print("Creating Alphabet ... ");
labeled = depReader.startReading(file);
DependencyInstance instance = depReader.getNext();
while (instance != null) {
String[] labs = instance.deprels;
for (int i = 0; i < labs.length; i++) {
typeAlphabet.lookupIndex(labs[i]);
}
createFeatureVector(instance);
instance = depReader.getNext();
}
closeAlphabets();
DependencyParser.out.println("Done.");
}
public void closeAlphabets() {
dataAlphabet.stopGrowth();
typeAlphabet.stopGrowth();
types = new String[typeAlphabet.size()];
Object[] keys = typeAlphabet.toArray();
for (int i = 0; i < keys.length; i++) {
int indx = typeAlphabet.lookupIndex(keys[i]);
types[indx] = (String) keys[i];
}
KBestParseForest.rootType = typeAlphabet.lookupIndex("<root-type>");
}
// add with default 1.0
public final void add(String feat, FeatureVector fv) {
int num = dataAlphabet.lookupIndex(feat);
if (num >= 0) {
fv.add(num, 1.0);
}
}
public final void add(String feat, double val, FeatureVector fv) {
int num = dataAlphabet.lookupIndex(feat);
if (num >= 0) {
fv.add(num, val);
}
}
public FeatureVector createFeatureVector(DependencyInstance instance) {
final int instanceLength = instance.length();
String[] labs = instance.deprels;
int[] heads = instance.heads;
FeatureVector fv = new FeatureVector();
for (int i = 0; i < instanceLength; i++) {
if (heads[i] == -1) {
continue;
}
int small = i < heads[i] ? i : heads[i];
int large = i > heads[i] ? i : heads[i];
boolean attR = i < heads[i] ? false : true;
addCoreFeatures(instance, small, large, attR, fv);
if (labeled) {
if (!separateLab) { // afm 06-03-08
addLabeledFeatures(instance, i, labs[i], attR, true, fv);
addLabeledFeatures(instance, heads[i], labs[i], attR, false, fv);
}
}
}
addExtendedFeatures(instance, fv);
return fv;
}
protected void addExtendedFeatures(DependencyInstance instance,
FeatureVector fv) {
}
public void addCoreFeatures(DependencyInstance instance,
int small,
int large,
boolean attR,
FeatureVector fv) {
String[] forms = instance.forms;
String[] pos = instance.postags;
String[] posA = instance.cpostags;
String att = attR ? "RA" : "LA";
int dist = Math.abs(large - small);
String distBool;
if (dist > 10) {
distBool = "10";
} else if (dist > 5) {
distBool = "5";
} else {
distBool = Integer.toString(dist - 1);
}
String attDist = "&" + att + "&" + distBool;
addLinearFeatures("POS", pos, small, large, attDist, fv);
addLinearFeatures("CPOS", posA, small, large, attDist, fv);
//////////////////////////////////////////////////////////////////////
int headIndex = small;
int childIndex = large;
if (!attR) {
headIndex = large;
childIndex = small;
}
addTwoObsFeatures("HC", forms[headIndex], pos[headIndex],
forms[childIndex], pos[childIndex], attDist, fv);
// afm 06-03-2008 --- McDonald's ACL08 code adds also features for 3-size prefixes and suffixes of forms
if (isCONLL) {
addTwoObsFeatures("HCA", forms[headIndex], posA[headIndex],
forms[childIndex], posA[childIndex], attDist, fv);
addTwoObsFeatures("HCC", instance.lemmas[headIndex], pos[headIndex],
instance.lemmas[childIndex], pos[childIndex],
attDist, fv);
addTwoObsFeatures("HCD", instance.lemmas[headIndex], posA[headIndex],
instance.lemmas[childIndex], posA[childIndex],
attDist, fv);
if (options.discourseMode) {
// Note: The features invoked here are designed for
// discourse parsing (as opposed to sentential
// parsing). It is conceivable that they could help for
// sentential parsing, but current testing indicates that
// they hurt sentential parsing performance.
addDiscourseFeatures(instance, small, large,
headIndex, childIndex,
attDist, fv);
} else {
// Add in features from the feature lists. It assumes
// the feature lists can have different lengths for
// each item. For example, nouns might have a
// different number of morphological features than
// verbs.
/*///////////////////////////////////////////////////////////////
// Agreement Feature
boolean headAttsMatched [] = new boolean[instance.feats[headIndex].length];
boolean depAttsMatched [] = new boolean[instance.feats[childIndex].length];
String hPOS = posA[headIndex]; // grab head CPOS
String dPOS = posA[childIndex]; // grab dep CPOS
for (int i=0; i<instance.feats[headIndex].length; i++) { // for each head attr
for (int j=0; j<instance.feats[childIndex].length; j++) { // for each dep attr
String headItem = instance.feats[headIndex][i]; // "item": attr=val
String depItem = instance.feats[childIndex][j];
if (headItem.contains("=") && depItem.contains("=")) { // if not "_"
String headAtt = instance.feats[headIndex][i].split("=")[0];
String depAtt = instance.feats[childIndex][j].split("=")[0];
String headVal = instance.feats[headIndex][i].split("=")[1];
String depVal = instance.feats[childIndex][j].split("=")[1];
if (depAtt.equals(headAtt)) { // if same attr
headAttsMatched[i] = true ; // found a match for this attr
depAttsMatched[j] = true ;
if (depVal.equals(headVal)) // if same value, add "agrees"
add(headAtt+"_agrees , head ="+hPOS+",dep ="+dPOS, fv);
else // if different, add "disagrees"
add(headAtt+"_disagrees , head ="+hPOS+",dep ="+dPOS, fv);
}
}
}
}
for (int i=0; i<headAttsMatched.length; i++) // for each head attr
if (!headAttsMatched[i]) { // if unmatched
String headItem = instance.feats[headIndex][i]; // add asymmetric
add("head_"+headItem+",head ="+hPOS+",dep ="+dPOS, fv);
}
for ( int i=0; i<depAttsMatched.length; i++) // for each dep attr
if (!depAttsMatched[i]) { // if unmatched
String depItem = instance.feats[childIndex][i]; // add asymmetric
add("dep_"+depItem+",head ="+hPOS+",dep ="+dPOS, fv);
}
/**///////////////////////////////////////////////////////////////
for (int i = 0; i < instance.feats[headIndex].length; i++) {
for (int j = 0; j < instance.feats[childIndex].length; j++) {
// afm 06-12-08 --- This lead to an explosion of (irrelevant) features
// To do something more similar to McDonald ACL'08, replace the two calls below by:
if (options.composeFeaturesWithPOS) {
addTwoObsFeatures("POSFEAT" + i + "*" + j,
instance.postags[headIndex],
instance.feats[headIndex][i],
instance.postags[childIndex],
instance.feats[childIndex][j],
attDist, fv);
} else {
addTwoObsFeatures("FF" + i + "*" + j,
instance.forms[headIndex],
instance.feats[headIndex][i],
instance.forms[childIndex],
instance.feats[childIndex][j],
attDist, fv);
addTwoObsFeatures("LF" + i + "*" + j,
instance.lemmas[headIndex],
instance.feats[headIndex][i],
instance.lemmas[childIndex],
instance.feats[childIndex][j],
attDist, fv);
}
}
}
}
if (instance.stacked) // afm 03-10-08
{
addCoreStackedFeatures(instance, headIndex, childIndex, attDist, fv);
}
} else {
// We are using the old MST format. Pick up stem features
// the way they used to be done. This is kept for
// replicability of results for old versions.
int hL = forms[headIndex].length();
int cL = forms[childIndex].length();
if (hL > 5 || cL > 5) {
addOldMSTStemFeatures(instance.lemmas[headIndex],
pos[headIndex],
instance.lemmas[childIndex],
pos[childIndex],
attDist, hL, cL, fv);
}
}
}
private void addCoreStackedFeatures(DependencyInstance instance,
int headIndex,
int childIndex,
String attDist,
FeatureVector fv) {
final int instanceLength = instance.length();
String[] labs_pred = instance.deprels_pred;
int[] heads_pred = instance.heads_pred;
String[] pos = instance.postags; // or cpostags?
String[] lemmas = instance.lemmas;
String[] forms = instance.forms;
int index;
int j;
String pos_index, lab_index, lemma_index, form_index;
if (headIndex == -1) {
return;
}
boolean attR = childIndex < headIndex ? false : true;
boolean isPredEdge = (heads_pred[childIndex] == headIndex);
boolean use_lemmas = true;
boolean use_forms = true;
if (options.stackedFeats.usePredEdge) {
add("STK_EDGE" + "=" + isPredEdge, fv); // Is predicted edge?
add("STK_EDGE_POS" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex], fv); // afm 06-07-08
if (isPredEdge) {
if (options.stackedFeats.useLabels) {
add("STK_EDGE_LBL" + "=" + labs_pred[childIndex], fv); // afm 03-14-08 --- Label of predicted edge
add("STK_EDGE_LBL_POS" + "=" + labs_pred[childIndex] + " " + pos[childIndex] + " " + pos[headIndex], fv); // afm 06-07-08
}
}
}
// afm 04-03-2008 --- Predicted head for this child, if this edge was not predicted
if (options.stackedFeats.usePredHead) {
if (!isPredEdge) {
pos_index = null;
lemma_index = null; // To be used later
form_index = null; // To be used later
if (heads_pred[childIndex] >= 0) {
pos_index = pos[heads_pred[childIndex]];
lemma_index = lemmas[heads_pred[childIndex]];
form_index = forms[heads_pred[childIndex]];
}
lab_index = labs_pred[childIndex];
// Head pos, predicted head lemma and pos
add("STK_HEAD" + "_HL" + "=" + pos[childIndex] + " " + " " + pos[headIndex] + " " + lemma_index + " " + pos_index + "*" + attDist, fv);
add("STK_HEAD" + "_HL" + "=" + pos[childIndex] + " " + " " + pos[headIndex] + " " + lemma_index + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add("STK_HEAD_LBL" + "_HL" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + lemma_index + " " + pos_index + " " + lab_index + "*" + attDist, fv);
add("STK_HEAD_LBL" + "_HL" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + lemma_index + " " + pos_index + " " + lab_index, fv);
}
if (use_forms) {
// Head pos, predicted head form and pos
add("STK_HEAD" + "_HF" + "=" + pos[childIndex] + " " + " " + pos[headIndex] + " " + form_index + " " + pos_index + "*" + attDist, fv);
add("STK_HEAD" + "_HF" + "=" + pos[childIndex] + " " + " " + pos[headIndex] + " " + form_index + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add("STK_HEAD_LBL" + "_HF" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + form_index + " " + pos_index + " " + lab_index + "*" + attDist, fv);
add("STK_HEAD_LBL" + "_HF" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + form_index + " " + pos_index + " " + lab_index, fv);
}
}
// Head pos, predicted head pos
add("STK_HEAD" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist, fv);
add("STK_HEAD" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add("STK_HEAD_LBL" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + " " + lab_index + "*" + attDist, fv);
add("STK_HEAD_LBL" + "=" + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + " " + lab_index, fv);
}
}
}
// afm 03-27-2008 --- All predicted children (not using labels so far --- it could help)
if (options.stackedFeats.useAllChildren) {
String featname;
String allchildren = "";
String allchildren_labs = "";
for (j = 0; j < instanceLength; j++) {
if (headIndex == heads_pred[j]) {
if (j == childIndex) {
allchildren += "[[C]]" + " "; // This means that the child was predicted at this position
allchildren_labs += "[[C]]]" + " ";
} else {
allchildren += pos[j] + " ";
allchildren_labs += labs_pred[j] + " ";
}
} else if (j == headIndex) {
allchildren += "[H]" + " ";
allchildren_labs += "[H]" + " ";
} else if (j == childIndex) {
allchildren += "[C]" + " ";
allchildren_labs += "[C]" + " ";
}
}
// afm 06-13-2008 --- with the head lemma
featname = "STK_ALLCHILD_HL_" + isPredEdge + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren;
add(featname, fv);
if (options.stackedFeats.useLabels) {
featname = "STK_ALLCHILD_LBL_HL_" + isPredEdge + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren_labs;
add(featname, fv);
}
if (use_forms) {
// afm 06-13-2008 --- with the head form
featname = "STK_ALLCHILD_HF_" + isPredEdge + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren;
add(featname, fv);
if (options.stackedFeats.useLabels) {
featname = "STK_ALLCHILD_LBL_HF_" + isPredEdge + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren_labs;
add(featname, fv);
}
}
// afm 06-14-2008 --- smoothed version, without the head lemma
featname = "STK_ALLCHILD_" + isPredEdge + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren;
add(featname, fv);
if (options.stackedFeats.useLabels) {
featname = "STK_ALLCHILD_LBL_" + isPredEdge + " " + pos[headIndex] + " " + pos[childIndex] + " " + allchildren_labs;
add(featname, fv);
}
}
int grandp = heads_pred[headIndex]; // Predicted grandparent
int valency = 0;
for (int i = 0; i < instanceLength; i++) {
if (heads_pred[i] == headIndex) {
valency++;
}
}
if (options.stackedFeats.useValency) {
// afm 06-13-2008 --- +isPredEdge
add("STK_VAL_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add("STK_VAL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add("STK_VAL_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
add("STK_VAL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
if (use_forms) {
add("STK_VAL_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add("STK_VAL_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
}
}
boolean isSiblMid = false; // Sibling in the middle between (candidate) head and modifier.
String prefix;
// Get previous and next (predicted) siblings
// Note: for next sibling, isSiblMid is always false.
for (int t = 0; t <= 2; t++) // t = 0 means previous sibling (in the direction head -> modifier); t = 1 means next
{
if (t == 0) {
prefix = "STK_PRVSBL";
} else if (t == 1) {
prefix = "STK_NXTSBL";
} else {
prefix = "STK_GRANDP";
}
if (t == 0 && options.stackedFeats.usePrevSibl == false) {
continue;
}
if (t == 1 && options.stackedFeats.useNextSibl == false) {
continue;
}
if (t == 2 && options.stackedFeats.useGrandparents == false) {
continue;
}
if ((t == 0 && attR) || // prev: head, sibl, modif, or sibl, head, modif
t == 1 && !attR) // next: sibl, modif, head
{
for (j = childIndex - 1; j >= 0; j--) {
if (headIndex == heads_pred[j]) {
break;
}
}
if (j >= 0) {
index = j;
if (index > headIndex) {
isSiblMid = true;
} else {
isSiblMid = false;
}
} else {
index = -1;
}
} else if (t != 2) // prev: modif, sibl, head, or modif, head, sibl
// next: head, modif, sibl
{
for (j = childIndex + 1; j < instanceLength; j++) {
if (headIndex == heads_pred[j]) {
break;
}
}
if (j < instanceLength) {
index = j;
if (index < headIndex) {
isSiblMid = true;
} else {
isSiblMid = false;
}
} else {
index = -1;
}
} else {
index = grandp;
}
if (index < 0) {
pos_index = "null";
lab_index = "null";
} else {
pos_index = pos[index];
lab_index = (t == 2) ? labs_pred[headIndex] : labs_pred[index]; // afm 06-13-2008
}
// Write features:
if (options.stackedFeats.useValency) {
// afm 06-13-2008 --- +isPredEdge
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
if (use_forms) {
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "$" + valency, fv); // afm 06-14-08 -- Predicted valency
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + "$" + valency, fv); // afm 03-15-08 -- Predicted valency
}
}
// Includes the head lemma and POS:
if (use_lemmas) {
if (t == 0) {
// afm 06-13-2008 --- +isPredEdge
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "#" + isSiblMid, fv);
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + "#" + isSiblMid, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_HL_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_HL_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
}
}
// afm 06-13-2008 --- +isPredEdge
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist, fv);
add(prefix + "_HL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_HL_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_HL_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + lemmas[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index, fv); // afm 03-14-08 --- Sibling label
}
}
// afm 06-14-2008 --- Includes the head form and POS:
if (use_forms) {
if (t == 0) {
// afm 06-13-2008 --- +isPredEdge
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "#" + isSiblMid, fv);
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + "#" + isSiblMid, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_HF_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_HF_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
}
}
// afm 06-13-2008 --- +isPredEdge
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist, fv);
add(prefix + "_HF" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_HF_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_HF_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + forms[headIndex] + " " + pos[headIndex] + " " + pos_index + lab_index, fv); // afm 03-14-08 --- Sibling label
}
}
// Includes the head POS:
if (t == 0) // For t == 1, the isSiblMid feature is always false, so it's useless
{
// afm 06-13-2008 --- +isPredEdge
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist + "#" + isSiblMid, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + "*" + attDist + "#" + isSiblMid, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "#" + isSiblMid, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + "#" + isSiblMid, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + lab_index + "*" + attDist + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + lab_index + "#" + isSiblMid, fv); // afm 03-14-08 --- Sibling label
}
}
// afm 06-13-2008 --- +isPredEdge
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + "*" + attDist, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + "*" + attDist, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index, fv);
add(prefix + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index, fv);
if (options.stackedFeats.useLabels) {
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + lab_index + "*" + attDist, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + lab_index + "*" + attDist, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos[headIndex] + " " + pos_index + lab_index, fv); // afm 03-14-08 --- Sibling label
add(prefix + "_LBL" + "=" + isPredEdge + " " + pos[childIndex] + " " + pos_index + lab_index, fv); // afm 03-14-08 --- Sibling label
}
}
}
private void addLinearFeatures(String type, String[] obsVals,
int first, int second,
String attachDistance,
FeatureVector fv) {
String pLeft = first > 0 ? obsVals[first - 1] : "STR";
String pRight = second < obsVals.length - 1 ? obsVals[second + 1] : "END";
String pLeftRight = first < second - 1 ? obsVals[first + 1] : "MID";
String pRightLeft = second > first + 1 ? obsVals[second - 1] : "MID";
// feature posR posMid posL
StringBuilder featPos =
new StringBuilder(type + "PC=" + obsVals[first] + " " + obsVals[second]);
for (int i = first + 1; i < second; i++) {
String allPos = featPos.toString() + ' ' + obsVals[i];
add(allPos, fv);
add(allPos + attachDistance, fv);
}
addCorePosFeatures(type + "PT", pLeft, obsVals[first], pLeftRight,
pRightLeft, obsVals[second], pRight, attachDistance, fv);
}
private void addCorePosFeatures(String prefix,
String leftOf1, String one, String rightOf1,
String leftOf2, String two, String rightOf2,
String attachDistance,
FeatureVector fv) {
// feature posL-1 posL posR posR+1
add(prefix + "=" + leftOf1 + " " + one + " " + two + "*" + attachDistance, fv);
StringBuilder feat =
new StringBuilder(prefix + "1=" + leftOf1 + " " + one + " " + two);
add(feat.toString(), fv);
feat.append(' ').append(rightOf2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2=" + leftOf1 + " " + two + " " + rightOf2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "3=" + leftOf1 + " " + one + " " + rightOf2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "4=" + one + " " + two + " " + rightOf2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
/////////////////////////////////////////////////////////////
prefix = "A" + prefix;
// feature posL posL+1 posR-1 posR
add(prefix + "1=" + one + " " + rightOf1 + " " + leftOf2 + "*" + attachDistance, fv);
feat = new StringBuilder(prefix + "1=" + one + " " + rightOf1 + " " + leftOf2);
add(feat.toString(), fv);
feat.append(' ').append(two);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2=" + one + " " + rightOf1 + " " + two);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "3=" + one + " " + leftOf2 + " " + two);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "4=" + rightOf1 + " " + leftOf2 + " " + two);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
///////////////////////////////////////////////////////////////
prefix = "B" + prefix;
//// feature posL-1 posL posR-1 posR
feat = new StringBuilder(prefix + "1=" + leftOf1 + " " + one + " " + leftOf2 + " " + two);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
//// feature posL posL+1 posR posR+1
feat = new StringBuilder(prefix + "2=" + one + " " + rightOf1 + " " + two + " " + rightOf2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
}
/**
* Add features for two items, each with two observations, e.g. head, head
* pos, child, and child pos.
*
* The use of StringBuilders is not yet as efficient as it could be, but
* this is a start. (And it abstracts the logic so we can add other features
* more easily based on other items and observations.)
*
*/
private void addTwoObsFeatures(String prefix,
String item1F1, String item1F2,
String item2F1, String item2F2,
String attachDistance,
FeatureVector fv) {
StringBuilder feat = new StringBuilder(prefix + "2FF1=" + item1F1);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF1=" + item1F1 + " " + item1F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF1=" + item1F1 + " " + item1F2 + " " + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF1=" + item1F1 + " " + item1F2 + " " + item2F2 + " " + item2F1);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF2=" + item1F1 + " " + item2F1);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF3=" + item1F1 + " " + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF4=" + item1F2 + " " + item2F1);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF4=" + item1F2 + " " + item2F1 + " " + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF5=" + item1F2 + " " + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF6=" + item2F1 + " " + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF7=" + item1F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF8=" + item2F1);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
feat = new StringBuilder(prefix + "2FF9=" + item2F2);
add(feat.toString(), fv);
feat.append('*').append(attachDistance);
add(feat.toString(), fv);
}
public void addLabeledFeatures(DependencyInstance instance,
int word,
String type,
boolean attR,
boolean childFeatures,
FeatureVector fv) {
if (!labeled) {
return;
}
String[] forms = instance.forms;
String[] pos = instance.postags;
String att;
if (attR) {
att = "RA";
} else {
att = "LA";
}
att += "&" + childFeatures;
String w = forms[word];
String wP = pos[word];
String wPm1 = word > 0 ? pos[word - 1] : "STR";
String wPp1 = word < pos.length - 1 ? pos[word + 1] : "END";
add("NTS1=" + type + "&" + att, fv);
add("ANTS1=" + type, fv);
for (int i = 0; i < 2; i++) {
String suff = i < 1 ? "&" + att : "";
suff = "&" + type + suff;
add("NTH=" + w + " " + wP + suff, fv);
add("NTI=" + wP + suff, fv);
add("NTIA=" + wPm1 + " " + wP + suff, fv);
add("NTIB=" + wP + " " + wPp1 + suff, fv);
add("NTIC=" + wPm1 + " " + wP + " " + wPp1 + suff, fv);
add("NTJ=" + w + suff, fv); //this
}
/*///////////////////////////////////////////////////////////////
// Agreement feature
if (childFeatures && ( instance.heads[word] != -1)) {
String[] headFeats = instance.feats[instance.heads[word]];
String[] childFeats = instance.feats[word];
String hPOS = instance.cpostags[instance.heads[word]] ; // grab head CPOS
String dPOS = instance.cpostags[word]; // grab dep CPOS
boolean headAttsMatched [] = new boolean[headFeats.length];
boolean depAttsMatched [] = new boolean[childFeats.length];
for (int i=0 ; i<childFeats.length; i++) { // for each head attr
for (int j=0 ; j<headFeats.length ; j++) { // for each dep attr
if (headFeats[j].contains("=") && childFeats[i].contains("=")) {
String headAtt = headFeats[j].split("=")[0];
String depAtt = childFeats[i].split("=")[0];
String headVal = headFeats[j].split("=")[1];
String depVal = childFeats[i].split("=")[1];
if ( depAtt.equals(headAtt) ) { // if same attribute
headAttsMatched[j] = true; // found a match for this attr
depAttsMatched[i] = true;
if (depVal.equals(headVal)) // if same value, add "agrees"
add(depAtt+"_agrees & label ="+type+",head ="+hPOS+",dep ="+dPOS, fv);
else
add(depAtt+"_disagrees & label ="+type+",head ="+hPOS+",dep ="+dPOS, fv);
}
}
}
}
for (int i=0; i<headAttsMatched.length; i++) // for each head attr
if (!headAttsMatched[i]) { // if unmatched
String headItem = headFeats[i]; // add asymmetric
add("head_"+headItem+",head ="+hPOS+",dep ="+dPOS+",label ="+type, fv);
}
for (int i=0; i<depAttsMatched.length; i++) // for each dep att
if (!depAttsMatched[i]) { // if unmatched
String depItem = childFeats[i]; // add asymmetric
add("dep_"+depItem+",head ="+hPOS+",dep ="+dPOS+",label ="+type, fv);
}
}
/**///////////////////////////////////////////////////////////////
if (instance.stacked) { // afm 03-11-08
addLabeledStackedFeatures(instance, word, type, attR, childFeatures, fv);
}
}
// afm 03-11-08
private void addLabeledStackedFeatures(DependencyInstance instance,
int index,
String label,
boolean attR,
boolean isChild,
FeatureVector fv) {
// Add labeled stacked features here --- afm 03-11-08
}
private void addDiscourseFeatures(DependencyInstance instance,
int small,
int large,
int headIndex,
int childIndex,
String attDist,
FeatureVector fv) {
addLinearFeatures("FORM", instance.forms, small, large, attDist, fv);
addLinearFeatures("LEMMA", instance.lemmas, small, large, attDist, fv);
addTwoObsFeatures("HCB1", instance.forms[headIndex],
instance.lemmas[headIndex],
instance.forms[childIndex],
instance.lemmas[childIndex],
attDist, fv);
addTwoObsFeatures("HCB2", instance.forms[headIndex],
instance.lemmas[headIndex],
instance.forms[childIndex],
instance.postags[childIndex],
attDist, fv);
addTwoObsFeatures("HCB3", instance.forms[headIndex],
instance.lemmas[headIndex],
instance.forms[childIndex],
instance.cpostags[childIndex],
attDist, fv);
addTwoObsFeatures("HC2", instance.forms[headIndex],
instance.postags[headIndex],
instance.forms[childIndex],
instance.cpostags[childIndex], attDist, fv);
addTwoObsFeatures("HCC2", instance.lemmas[headIndex],
instance.postags[headIndex],
instance.lemmas[childIndex],
instance.cpostags[childIndex],
attDist, fv);
//// Use this if your extra feature lists all have the same length.
for (int i = 0; i < instance.feats.length; i++) {
addLinearFeatures("F" + i, instance.feats[i], small, large, attDist, fv);
addTwoObsFeatures("FF" + i,
instance.forms[headIndex],
instance.feats[i][headIndex],
instance.forms[childIndex],
instance.feats[i][childIndex],
attDist, fv);
addTwoObsFeatures("LF" + i,
instance.lemmas[headIndex],
instance.feats[i][headIndex],
instance.lemmas[childIndex],
instance.feats[i][childIndex],
attDist, fv);
addTwoObsFeatures("PF" + i,
instance.postags[headIndex],
instance.feats[i][headIndex],
instance.postags[childIndex],
instance.feats[i][childIndex],
attDist, fv);
addTwoObsFeatures("CPF" + i,
instance.cpostags[headIndex],
instance.feats[i][headIndex],
instance.cpostags[childIndex],
instance.feats[i][childIndex],
attDist, fv);
for (int j = i + 1; j < instance.feats.length; j++) {
addTwoObsFeatures("CPF" + i + "_" + j,
instance.feats[i][headIndex],
instance.feats[j][headIndex],
instance.feats[i][childIndex],
instance.feats[j][childIndex],
attDist, fv);
}
for (int j = 0; j < instance.feats.length; j++) {
addTwoObsFeatures("XFF" + i + "_" + j,
instance.forms[headIndex],
instance.feats[i][headIndex],
instance.forms[childIndex],
instance.feats[j][childIndex],
attDist, fv);
addTwoObsFeatures("XLF" + i + "_" + j,
instance.lemmas[headIndex],
instance.feats[i][headIndex],
instance.lemmas[childIndex],
instance.feats[j][childIndex],
attDist, fv);
addTwoObsFeatures("XPF" + i + "_" + j,
instance.postags[headIndex],
instance.feats[i][headIndex],
instance.postags[childIndex],
instance.feats[j][childIndex],
attDist, fv);
addTwoObsFeatures("XCF" + i + "_" + j,
instance.cpostags[headIndex],
instance.feats[i][headIndex],
instance.cpostags[childIndex],
instance.feats[j][childIndex],
attDist, fv);
}
}
// Test out relational features
if (options.useRelationalFeatures) {
//for (int rf_index=0; rf_index<2; rf_index++) {
for (int rf_index = 0;
rf_index < instance.relFeats.length;
rf_index++) {
String headToChild =
"H2C" + rf_index + instance.relFeats[rf_index].getFeature(headIndex, childIndex);
addTwoObsFeatures("RFA1",
instance.forms[headIndex],
instance.lemmas[headIndex],
instance.postags[childIndex],
headToChild,
attDist, fv);
addTwoObsFeatures("RFA2",
instance.postags[headIndex],
instance.cpostags[headIndex],
instance.forms[childIndex],
headToChild,
attDist, fv);
addTwoObsFeatures("RFA3",
instance.lemmas[headIndex],
instance.postags[headIndex],
instance.forms[childIndex],
headToChild,
attDist, fv);
addTwoObsFeatures("RFB1",
headToChild,
instance.postags[headIndex],
instance.forms[childIndex],
instance.lemmas[childIndex],
attDist, fv);
addTwoObsFeatures("RFB2",
headToChild,
instance.forms[headIndex],
instance.postags[childIndex],
instance.cpostags[childIndex],
attDist, fv);
addTwoObsFeatures("RFB3",
headToChild,
instance.forms[headIndex],
instance.lemmas[childIndex],
instance.postags[childIndex],
attDist, fv);
}
}
}
public void fillFeatureVectors(DependencyInstance instance,
FeatureVector[][][] fvs,
double[][][] probs,
FeatureVector[][][][] nt_fvs,
double[][][][] nt_probs, Parameters params) {
final int instanceLength = instance.length();
// Get production crap.
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = w1 + 1; w2 < instanceLength; w2++) {
for (int ph = 0; ph < 2; ph++) {
boolean attR = ph == 0 ? true : false;
int childInt = attR ? w2 : w1;
int parInt = attR ? w1 : w2;
FeatureVector prodFV = new FeatureVector();
addCoreFeatures(instance, w1, w2, attR, prodFV);
double prodProb = params.getScore(prodFV);
fvs[w1][w2][ph] = prodFV;
probs[w1][w2][ph] = prodProb;
}
}
}
if (labeled) {
if (!separateLab) { // afm 06-03-08
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int t = 0; t < types.length; t++) {
String type = types[t];
for (int ph = 0; ph < 2; ph++) {
boolean attR = ph == 0 ? true : false;
for (int ch = 0; ch < 2; ch++) {
boolean child = ch == 0 ? true : false;
FeatureVector prodFV = new FeatureVector();
addLabeledFeatures(instance, w1,
type, attR, child, prodFV);
double nt_prob = params.getScore(prodFV);
nt_fvs[w1][t][ph][ch] = prodFV;
nt_probs[w1][t][ph][ch] = nt_prob;
}
}
}
}
}
}
}
/**
* Write an instance to an output stream for later reading.
*
*
*/
protected void writeInstance(DependencyInstance instance, ObjectOutputStream out) {
int instanceLength = instance.length();
try {
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = w1 + 1; w2 < instanceLength; w2++) {
for (int ph = 0; ph < 2; ph++) {
boolean attR = ph == 0 ? true : false;
FeatureVector prodFV = new FeatureVector();
addCoreFeatures(instance, w1, w2, attR, prodFV);
out.writeObject(prodFV.keys());
}
}
}
out.writeInt(-3);
if (labeled) {
if (!separateLab) { // afm 06-03-08
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int t = 0; t < types.length; t++) {
String type = types[t];
for (int ph = 0; ph < 2; ph++) {
boolean attR = ph == 0 ? true : false;
for (int ch = 0; ch < 2; ch++) {
boolean child = ch == 0 ? true : false;
FeatureVector prodFV = new FeatureVector();
addLabeledFeatures(instance, w1,
type, attR, child, prodFV);
out.writeObject(prodFV.keys());
}
}
}
}
out.writeInt(-3);
}
}
writeExtendedFeatures(instance, out);
out.writeObject(instance.fv.keys());
out.writeInt(-4);
out.writeObject(instance);
out.writeInt(-1);
out.reset();
} catch (IOException e) {
}
}
/**
* Override this method if you have extra features that need to be written
* to disk. For the basic DependencyPipe, nothing happens.
*
*/
protected void writeExtendedFeatures(DependencyInstance instance, ObjectOutputStream out)
throws IOException {
}
/**
* Read an instance from an input stream.
*
*
*/
public DependencyInstance readInstance(ObjectInputStream in,
int length,
FeatureVector[][][] fvs,
double[][][] probs,
FeatureVector[][][][] nt_fvs,
double[][][][] nt_probs,
Parameters params) throws IOException {
try {
// Get production crap.
for (int w1 = 0; w1 < length; w1++) {
for (int w2 = w1 + 1; w2 < length; w2++) {
for (int ph = 0; ph < 2; ph++) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double prodProb = params.getScore(prodFV);
fvs[w1][w2][ph] = prodFV;
probs[w1][w2][ph] = prodProb;
}
}
}
int last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
if (labeled) {
if (!separateLab) { // afm 06-04-08
for (int w1 = 0; w1 < length; w1++) {
for (int t = 0; t < types.length; t++) {
String type = types[t];
for (int ph = 0; ph < 2; ph++) {
for (int ch = 0; ch < 2; ch++) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double nt_prob = params.getScore(prodFV);
nt_fvs[w1][t][ph][ch] = prodFV;
nt_probs[w1][t][ph][ch] = nt_prob;
}
}
}
}
last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
}
}
FeatureVector nfv = new FeatureVector((int[]) in.readObject());
last = in.readInt();
if (last != -4) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
DependencyInstance marshalledDI;
marshalledDI = (DependencyInstance) in.readObject();
marshalledDI.setFeatureVector(nfv);
last = in.readInt();
if (last != -1) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
return marshalledDI;
} catch (ClassNotFoundException e) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
// this won't happen, but it takes care of compilation complaints
return null;
}
/**
* Get features for stems the old way. The only way this differs from
* calling addTwoObsFeatures() is that it checks the lengths of the full
* lexical items are greater than 5 before adding features.
*
*/
private void addOldMSTStemFeatures(String hLemma, String headP,
String cLemma, String childP, String attDist,
int hL, int cL, FeatureVector fv) {
String all = hLemma + " " + headP + " " + cLemma + " " + childP;
String hPos = headP + " " + cLemma + " " + childP;
String cPos = hLemma + " " + headP + " " + childP;
String hP = headP + " " + cLemma;
String cP = hLemma + " " + childP;
String oPos = headP + " " + childP;
String oLex = hLemma + " " + cLemma;
add("SA=" + all + attDist, fv); //this
add("SF=" + oLex + attDist, fv); //this
add("SAA=" + all, fv); //this
add("SFF=" + oLex, fv); //this
if (cL > 5) {
add("SB=" + hPos + attDist, fv);
add("SD=" + hP + attDist, fv);
add("SK=" + cLemma + " " + childP + attDist, fv);
add("SM=" + cLemma + attDist, fv); //this
add("SBB=" + hPos, fv);
add("SDD=" + hP, fv);
add("SKK=" + cLemma + " " + childP, fv);
add("SMM=" + cLemma, fv); //this
}
if (hL > 5) {
add("SC=" + cPos + attDist, fv);
add("SE=" + cP + attDist, fv);
add("SH=" + hLemma + " " + headP + attDist, fv);
add("SJ=" + hLemma + attDist, fv); //this
add("SCC=" + cPos, fv);
add("SEE=" + cP, fv);
add("SHH=" + hLemma + " " + headP, fv);
add("SJJ=" + hLemma, fv); //this
}
}
}