package org.cogroo.tools.headfinder;
import org.cogroo.tools.chunker2.ChunkerContextGenerator;
import org.cogroo.tools.chunker2.TokenTag;
public class HeadFinderContextGenerator implements ChunkerContextGenerator {
@Override
public String[] getContext(int index, TokenTag[] sequence,
String[] priorDecisions, Object[] additionalContext) {
return getContext(index, sequence, priorDecisions);
}
public String[] getContext(int index, TokenTag[] sequence, String[] priorDecisions) {
String[] toks = new String[sequence.length];
String[] tags = new String[sequence.length];
String[] chunks = new String[sequence.length];
for (int i = 0; i < sequence.length; i++) {
toks[i] = sequence[i].getToken();
String t = sequence[i].getTag();
int bar = t.indexOf("|");
tags[i] = t.substring(0, bar);
chunks[i] = t.substring(bar+1);
}
return getContext(index, toks, tags, chunks, priorDecisions);
}
@Override
public String[] getContext(int i, String[] toks, String[] tags, String[] preds) {
return getContext(i, TokenTag.create(toks, tags), preds);
}
public String[] getContext(int i, String[] toks, String[] tags, String[] chks, String[] preds) {
// Words in a 5-word window
String w_2, w_1, w0, w1, w2;
// Tags in a 5-word window
String t_2, t_1, t0, t1, t2;
// Chunks in a 5-word window
String c_2, c_1, c0, c1, c2;
// Previous predictions
String p_2, p_1;
if (i < 2) {
w_2 = "w_2=bos";
t_2 = "t_2=bos";
c_2 = "c_2=bos";
p_2 = "p_2=bos";
}
else {
w_2 = "w_2=" + toks[i - 2];
t_2 = "t_2=" + tags[i - 2];
c_2 = "c_2=" + chks[i - 2];
p_2 = "p_2" + preds[i - 2];
}
if (i < 1) {
w_1 = "w_1=bos";
t_1 = "t_1=bos";
c_1 = "c_1=bos";
p_1 = "p_1=bos";
}
else {
w_1 = "w_1=" + toks[i - 1];
t_1 = "t_1=" + tags[i - 1];
c_1 = "c_1=" + chks[i - 1];
p_1 = "p_1=" + preds[i - 1];
}
w0 = "w0=" + toks[i];
t0 = "t0=" + tags[i];
c0 = "c0=" + chks[i];
if (i + 1 >= toks.length) {
w1 = "w1=eos";
t1 = "t1=eos";
c1 = "c1=eos";
}
else {
w1 = "w1=" + toks[i + 1];
t1 = "t1=" + tags[i + 1];
c1 = "c1=" + chks[i + 1];
}
if (i + 2 >= toks.length) {
w2 = "w2=eos";
t2 = "t2=eos";
c2 = "c2=eos";
}
else {
w2 = "w2=" + toks[i + 2];
t2 = "t2=" + tags[i + 2];
c2 = "c2=" + chks[i + 2];
}
String[] features = new String[] {
//add word features
w_2,
w_1,
w0,
w1,
w2,
w_1 + w0,
w0 + w1,
//add tag features
t_2,
t_1,
t0,
t1,
t2,
t_2 + t_1,
t_1 + t0,
t0 + t1,
t1 + t2,
t_2 + t_1 + t0,
t_1 + t0 + t1,
t0 + t1 + t2,
//add chks features
c_2,
c_1,
c0,
c1,
c2,
c_2 + c_1,
c_1 + c0,
c0 + c1,
c1 + c2,
c_2 + c_1 + c0,
c_1 + c0 + c1,
c0 + c1 + c2,
//add pred tags
p_2,
p_1,
p_2 + p_1,
//add pred and tag
p_1 + t_2,
p_1 + t_1,
p_1 + t0,
p_1 + t1,
p_1 + t2,
p_1 + t_2 + t_1,
p_1 + t_1 + t0,
p_1 + t0 + t1,
p_1 + t1 + t2,
p_1 + t_2 + t_1 + t0,
p_1 + t_1 + t0 + t1,
p_1 + t0 + t1 + t2,
//add pred and chunk
p_1 + c_2,
p_1 + c_1,
p_1 + c0,
p_1 + c1,
p_1 + c2,
p_1 + c_2 + c_1,
p_1 + c_1 + c0,
p_1 + c0 + c1,
p_1 + c1 + c2,
p_1 + c_2 + c_1 + c0,
p_1 + c_1 + c0 + c1,
p_1 + c0 + c1 + c2,
//add pred and word
p_1 + w_2,
p_1 + w_1,
p_1 + w0,
p_1 + w1,
p_1 + w2,
p_1 + w_1 + w0,
p_1 + w0 + w1,
//
t_2 + c_2,
t_1 + c_1,
t0 + c0,
t1 + c1,
t2 + c2
};
return features;
}
}