package com.maalaang.omtwitter.ml;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
/**
* @author Sangwon Park
*/
public class TokenWithPosSequence extends Pipe {
private static final long serialVersionUID = 13483853094L;
private static final int SERIAL_VERSION = 9879283;
public TokenWithPosSequence(boolean targetProcessing) throws IOException, ClassNotFoundException {
super (null, new LabelAlphabet());
this.setTargetProcessing(targetProcessing);
}
public Instance pipe(Instance instance) {
LabelSequence target = null;
TokenSequence ts = new TokenSequence ();
String[][] data = ((String[][]) instance.getData());
String[] tokens = data[0];
String[] posTags = data[1];
if (isTargetProcessing()) {
String[] labels = data[2];
target = new LabelSequence((LabelAlphabet)getTargetAlphabet(), tokens.length);
for (int i = 0; i < tokens.length; i++) {
Token token = new Token(tokens[i]);
token.setFeatureValue("POS-" + posTags[i], 1.0);
ts.add(token);
target.add(labels[i]);
}
} else {
for (int i = 0; i < tokens.length; i++) {
Token token = new Token(tokens[i]);
token.setFeatureValue("POS-" + posTags[i], 1.0);
ts.add(token);
}
}
instance.setData(ts);
if (isTargetProcessing()) {
instance.setTarget(target);
}
return instance;
}
private void writeObject(ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
out.writeInt(SERIAL_VERSION);
}
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
in.readInt();
}
}