package mstparser;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
public class DependencyPipe2O extends DependencyPipe {
public DependencyPipe2O(ParserOptions options) throws IOException {
super(options);
}
@Override
protected void addExtendedFeatures(DependencyInstance instance,
FeatureVector fv) {
final int instanceLength = instance.length();
int[] heads = instance.heads;
// find all trip features
for (int i = 0; i < instanceLength; i++) {
if (heads[i] == -1 && i != 0) {
continue;
}
// right children
int prev = i;
for (int j = i + 1; j < instanceLength; j++) {
if (heads[j] == i) {
addTripFeatures(instance, i, prev, j, fv);
addSiblingFeatures(instance, prev, j, prev == i, fv);
prev = j;
}
}
prev = i;
for (int j = i - 1; j >= 0; j--) {
if (heads[j] == i) {
addTripFeatures(instance, i, prev, j, fv);
addSiblingFeatures(instance, prev, j, prev == i, fv);
prev = j;
}
}
}
}
public void fillFeatureVectors(DependencyInstance instance,
FeatureVector[][][] fvs,
double[][][] probs,
FeatureVector[][][] fvs_trips,
double[][][] probs_trips,
FeatureVector[][][] fvs_sibs,
double[][][] probs_sibs,
FeatureVector[][][][] nt_fvs,
double[][][][] nt_probs, Parameters params) {
fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params);
final int instanceLength = instance.length();
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = w1; w2 < instanceLength; w2++) {
for (int w3 = w2 + 1; w3 < instanceLength; w3++) {
FeatureVector prodFV = new FeatureVector();
addTripFeatures(instance, w1, w2, w3, prodFV);
double prodProb = params.getScore(prodFV);
fvs_trips[w1][w2][w3] = prodFV;
probs_trips[w1][w2][w3] = prodProb;
}
}
for (int w2 = w1; w2 >= 0; w2--) {
for (int w3 = w2 - 1; w3 >= 0; w3--) {
FeatureVector prodFV = new FeatureVector();
addTripFeatures(instance, w1, w2, w3, prodFV);
double prodProb = params.getScore(prodFV);
fvs_trips[w1][w2][w3] = prodFV;
probs_trips[w1][w2][w3] = prodProb;
}
}
}
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = 0; w2 < instanceLength; w2++) {
for (int wh = 0; wh < 2; wh++) {
if (w1 != w2) {
FeatureVector prodFV = new FeatureVector();
addSiblingFeatures(instance, w1, w2, wh == 0, prodFV);
double prodProb = params.getScore(prodFV);
fvs_sibs[w1][w2][wh] = prodFV;
probs_sibs[w1][w2][wh] = prodProb;
}
}
}
}
}
private void addSiblingFeatures(DependencyInstance instance,
int ch1, int ch2,
boolean isST,
FeatureVector fv) {
String[] forms = instance.forms;
String[] pos = instance.postags;
// ch1 is always the closes to par
String dir = ch1 > ch2 ? "RA" : "LA";
String ch1_pos = isST ? "STPOS" : pos[ch1];
String ch2_pos = pos[ch2];
String ch1_word = isST ? "STWRD" : forms[ch1];
String ch2_word = forms[ch2];
add("CH_PAIR=" + ch1_pos + "_" + ch2_pos + "_" + dir, 1.0, fv);
add("CH_WPAIR=" + ch1_word + "_" + ch2_word + "_" + dir, 1.0, fv);
add("CH_WPAIRA=" + ch1_word + "_" + ch2_pos + "_" + dir, 1.0, fv);
add("CH_WPAIRB=" + ch1_pos + "_" + ch2_word + "_" + dir, 1.0, fv);
add("ACH_PAIR=" + ch1_pos + "_" + ch2_pos, 1.0, fv);
add("ACH_WPAIR=" + ch1_word + "_" + ch2_word, 1.0, fv);
add("ACH_WPAIRA=" + ch1_word + "_" + ch2_pos, 1.0, fv);
add("ACH_WPAIRB=" + ch1_pos + "_" + ch2_word, 1.0, fv);
int dist = Math.max(ch1, ch2) - Math.min(ch1, ch2);
String distBool = "0";
if (dist > 1) {
distBool = "1";
}
if (dist > 2) {
distBool = "2";
}
if (dist > 3) {
distBool = "3";
}
if (dist > 4) {
distBool = "4";
}
if (dist > 5) {
distBool = "5";
}
if (dist > 10) {
distBool = "10";
}
add("SIB_PAIR_DIST=" + distBool + "_" + dir, 1.0, fv);
add("ASIB_PAIR_DIST=" + distBool, 1.0, fv);
add("CH_PAIR_DIST=" + ch1_pos + "_" + ch2_pos + "_" + distBool + "_" + dir, 1.0, fv);
add("ACH_PAIR_DIST=" + ch1_pos + "_" + ch2_pos + "_" + distBool, 1.0, fv);
}
private void addTripFeatures(DependencyInstance instance,
int par,
int ch1, int ch2,
FeatureVector fv) {
String[] pos = instance.postags;
// ch1 is always the closest to par
String dir = par > ch2 ? "RA" : "LA";
String par_pos = pos[par];
String ch1_pos = ch1 == par ? "STPOS" : pos[ch1];
String ch2_pos = pos[ch2];
String pTrip = par_pos + "_" + ch1_pos + "_" + ch2_pos;
add("POS_TRIP=" + pTrip + "_" + dir, 1.0, fv);
add("APOS_TRIP=" + pTrip, 1.0, fv);
}
/**
* Write out the second order features.
*
*
*/
@Override
protected void writeExtendedFeatures(DependencyInstance instance, ObjectOutputStream out)
throws IOException {
final int instanceLength = instance.length();
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = w1; w2 < instanceLength; w2++) {
for (int w3 = w2 + 1; w3 < instanceLength; w3++) {
FeatureVector prodFV = new FeatureVector();
addTripFeatures(instance, w1, w2, w3, prodFV);
out.writeObject(prodFV.keys());
}
}
for (int w2 = w1; w2 >= 0; w2--) {
for (int w3 = w2 - 1; w3 >= 0; w3--) {
FeatureVector prodFV = new FeatureVector();
addTripFeatures(instance, w1, w2, w3, prodFV);
out.writeObject(prodFV.keys());
}
}
}
out.writeInt(-3);
for (int w1 = 0; w1 < instanceLength; w1++) {
for (int w2 = 0; w2 < instanceLength; w2++) {
for (int wh = 0; wh < 2; wh++) {
if (w1 != w2) {
FeatureVector prodFV = new FeatureVector();
addSiblingFeatures(instance, w1, w2, wh == 0, prodFV);
out.writeObject(prodFV.keys());
}
}
}
}
out.writeInt(-3);
}
public DependencyInstance readInstance(ObjectInputStream in,
int length,
FeatureVector[][][] fvs,
double[][][] probs,
FeatureVector[][][] fvs_trips,
double[][][] probs_trips,
FeatureVector[][][] fvs_sibs,
double[][][] probs_sibs,
FeatureVector[][][][] nt_fvs,
double[][][][] nt_probs,
Parameters params) throws IOException {
try {
// Get production crap.
for (int w1 = 0; w1 < length; w1++) {
for (int w2 = w1 + 1; w2 < length; w2++) {
for (int ph = 0; ph < 2; ph++) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double prodProb = params.getScore(prodFV);
fvs[w1][w2][ph] = prodFV;
probs[w1][w2][ph] = prodProb;
}
}
}
int last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
if (labeled) {
if (!separateLab) { // afm 06-04-08
for (int w1 = 0; w1 < length; w1++) {
for (int t = 0; t < types.length; t++) {
String type = types[t];
for (int ph = 0; ph < 2; ph++) {
for (int ch = 0; ch < 2; ch++) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double nt_prob = params.getScore(prodFV);
nt_fvs[w1][t][ph][ch] = prodFV;
nt_probs[w1][t][ph][ch] = nt_prob;
}
}
}
}
last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
}
}
for (int w1 = 0; w1 < length; w1++) {
for (int w2 = w1; w2 < length; w2++) {
for (int w3 = w2 + 1; w3 < length; w3++) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double prodProb = params.getScore(prodFV);
fvs_trips[w1][w2][w3] = prodFV;
probs_trips[w1][w2][w3] = prodProb;
}
}
for (int w2 = w1; w2 >= 0; w2--) {
for (int w3 = w2 - 1; w3 >= 0; w3--) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double prodProb = params.getScore(prodFV);
fvs_trips[w1][w2][w3] = prodFV;
probs_trips[w1][w2][w3] = prodProb;
}
}
}
last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
for (int w1 = 0; w1 < length; w1++) {
for (int w2 = 0; w2 < length; w2++) {
for (int wh = 0; wh < 2; wh++) {
if (w1 != w2) {
FeatureVector prodFV = new FeatureVector((int[]) in.readObject());
double prodProb = params.getScore(prodFV);
fvs_sibs[w1][w2][wh] = prodFV;
probs_sibs[w1][w2][wh] = prodProb;
}
}
}
}
last = in.readInt();
if (last != -3) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
FeatureVector nfv = new FeatureVector((int[]) in.readObject());
last = in.readInt();
if (last != -4) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
DependencyInstance marshalledDI;
marshalledDI = (DependencyInstance) in.readObject();
marshalledDI.setFeatureVector(nfv);
last = in.readInt();
if (last != -1) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
return marshalledDI;
} catch (ClassNotFoundException e) {
DependencyParser.out.println("Error reading file.");
System.exit(0);
}
// this won't happen, but it takes care of compilation complaints
return null;
}
}