package edu.fudan.nlp.pipe.seq;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import edu.fudan.ml.types.DynamicInfo;
import edu.fudan.ml.types.Instance;
import edu.fudan.ml.types.alphabet.IFeatureAlphabet;
import edu.fudan.ml.types.alphabet.LabelAlphabet;
import edu.fudan.nlp.pipe.Pipe;
import edu.fudan.nlp.pipe.seq.templet.DynamicTemplet;
import edu.fudan.nlp.pipe.seq.templet.Templet;
public class Sequence2DynamicFeatureSequence extends Pipe {
private static final long serialVersionUID = -5568795070369739920L;
List<Templet> templets;
List<Templet> dynamicTemplets;
public IFeatureAlphabet features;
LabelAlphabet labels;
public Sequence2DynamicFeatureSequence(List<Templet> templets, List<Templet> dynamicTemplets,
IFeatureAlphabet features, LabelAlphabet labels) {
this.templets = templets;
this.dynamicTemplets = dynamicTemplets;
this.features = features;
this.labels = labels;
}
public void addThruPipe(Instance instance) throws Exception {
String[][] data = (String[][]) instance.getData();
String[] target = (String[]) instance.getTempData();
instance.setSource(instance.getData());
int[][] newData = new int[data.length][templets.size() + dynamicTemplets.size()];
for (int i = 0; i < data.length; i++) {
Arrays.fill(newData[i], -1);
for (int j = 0; j < templets.size(); j++) {
newData[i][j] = templets.get(j).generateAt(instance,
this.features, i, labels.size());
}
// System.out.println(data[i][0]);
if(DynamicTemplet.MIN == 0) {
ArrayList<DynamicInfo> preLabel = getPreInfo(data, instance, i, target);
// System.out.println(preLabel);
instance.setTempData(preLabel);
} else {
ArrayList<String> fl = getPreLabel(instance, i, target);
ArrayList<String> bl = getPreLabel_back(instance, i, target);
LinkedList<ArrayList<String>> preLabel = new LinkedList<ArrayList<String>>();
preLabel.add(fl);
preLabel.add(bl);
instance.setTempData(preLabel);
}
for (int j = 0; j < dynamicTemplets.size(); j++) {
newData[i][j + templets.size()] = dynamicTemplets.get(j).generateAt(instance,
this.features, i, labels.size());
}
}
instance.setData(newData);
}
private String getLabelName(String s) {
int index = s.indexOf("-");
String t = index < 0 ? s : s.substring(0, index);
return index < 0 ? t : s.substring(index + 1, s.length());
}
private boolean isEndLabel(String s) {
int index = s.indexOf("-");
String t = index < 0 ? s : s.substring(0, index);
if(t.equals("E") || t.equals("S"))
return true;
else
return false;
}
private ArrayList<DynamicInfo> getPreInfo(String[][] data, Instance instance, int p, String[] target) {
ArrayList<DynamicInfo> al = new ArrayList<DynamicInfo>();
StringBuffer word = new StringBuffer();
String pos = null;
if(p > 0) {
if(isEndLabel(target[p - 1]))
al.add(new DynamicInfo("", "", 0));
else
pos = getLabelName(target[p - 1]);
}
for(int l = p - 1; l >= 0; l--) {
String s = target[l];
int index = s.indexOf("-");
String t = index < 0 ? s : s.substring(0, index);
if(t.equals("S") || t.equals("E")) {
if(pos != null) {
word.reverse();
al.add(new DynamicInfo(pos, word.toString(), word.toString().length()));
if(al.size() > DynamicTemplet.MAX)
break;
}
pos = index < 0 ? t : s.substring(index + 1, s.length());
word = new StringBuffer();
}
word.append(data[l][0]);
}
if(al.size() <= DynamicTemplet.MAX && pos != null) {
word.reverse();
al.add(new DynamicInfo(pos, word.toString(), word.toString().length()));
}
return al;
}
private ArrayList<String> getPreLabel(Instance instance, int p, String[] target) {
ArrayList<String> al = new ArrayList<String>();
for (int l = p - 1; l >= 0; l--) {
String s = target[l];
int index = s.indexOf("-");
String t = s.substring(0, index);
if(t.equals("S") || t.equals("E"))
al.add(s.substring(index + 1, s.length()));
if(al.size() > DynamicTemplet.MAX)
break;
}
return al;
}
private ArrayList<String> getPreLabel_back(Instance instance, int p, String[] target) {
ArrayList<String> al = new ArrayList<String>();
for (int l = p; l < target.length; l++) {
String s = target[l];
int index = s.indexOf("-");
String t = s.substring(0, index);
if(t.equals("S") || t.equals("E"))
al.add(s.substring(index + 1, s.length()));
if(al.size() > - DynamicTemplet.MIN - 1)
break;
}
return al;
}
}