package edu.fudan.nlp.cn.anaphora;
import edu.fudan.ml.types.Instance;
import edu.fudan.nlp.cn.Tags;
/**
* 用于训练模型的特征生成
* @author jszhao
* @version 1.0
* @since FudanNLP 1.5
*/
public class FeatureGeter {
private int[] featrue;
private EntityGroup eGroup;
private Instance inst;
public FeatureGeter(Instance inst){
this.inst =inst;
this.eGroup = (EntityGroup) inst.getData();
featrue = new int[19];
this.doFeature();
}
public FeatureGeter(EntityGroup eGroup){
featrue = new int[19];
this.eGroup = eGroup;
this.doFeature();
}
private Boolean isSub(String str1,String str2){
Boolean bl = true;
for(int i=1;i<str2.length();i++){
bl = bl&&str1.contains(str2.substring(i, i+1));
if(i==1&&!bl){
bl = true;
}
}
return bl;
}
private void doFeature(){
Entity ahead = this.eGroup.getAhead();
Entity behind = this.eGroup.getBehind();
String aheadData = ahead.getData();
String behindData = behind.getData();
Boolean bool = this.isSub(aheadData, behindData);
if(bool){ //中心词匹配
featrue[0] = 1;
}
else
featrue[0] = 0;
if(ahead.getPosTag().contains("代词")) //I为人称代词
featrue[1] = 1;
else
featrue[1] = 0;
if(ahead.getPosTag().equals("名词"))
featrue[2] = 1;
else
featrue[2] = 0;
if(Tags.isEntiry(ahead.getPosTag()))
featrue[3] = 1;
else
featrue[3] = 0;
if(behind.getData().contains("他")||behind.getData().contains("她")) //J为人称代词
featrue[4] = 1;
else
featrue[4] = 0;
if(behind.getData().contains("它")) //J为人称代词
featrue[5] = 1;
else
featrue[5] = 0;
if(behind.getData().contains("我")||behind.getData().contains("你")) //J为人称代词
featrue[6] = 1;
else
featrue[6] = 0;
if(behind.getData().contains("自己")) //J为人称代词
featrue[7] = 1;
else
featrue[7] = 0;
if(behind.getPosTag().contains("代词"))
featrue[8] = 1;
else
featrue[8] = 0;
if(behindData.contains("这")||behindData.contains("那")||behindData.contains("其")||behindData.contains("该")) //J为指示性名词
featrue[9] = 1;
else
featrue[9] = 0;
if(ahead.getPosTag().contains("代词"))
featrue[8] = 1;
else
featrue[8] = 0;
if(ahead.getPosTag().equals("名词"))
featrue[9] = 1;
else
featrue[9] = 0;
if(Tags.isEntiry(ahead.getPosTag()))
featrue[10] = 1;
else
featrue[10] = 0;
//是否性别一致
if(!ahead.getSex().equals(behind.getSex())&&ahead.getSex()!="UNKNOW"){
featrue[11] = 1;
}
else
featrue[11] = 0;
if(ahead.getSex()=="UNKNOW"||behind.getSex()=="UNKNOW")
featrue[12] = 1;
else
featrue[12] = 0;
//是否单复数一致
if(!ahead.getIsSing().equals(behind.getIsSing())&&ahead.getIsSing()!="UNKNOW"){
featrue[13] = 1;
}
else
featrue[13] = 0;
if(ahead.getIsSing()=="UNKNOW"||behind.getIsSing()=="UNKNOW")
featrue[14] = 1;
else
featrue[14] = 0;
if(ahead.getGraTag().equals("SUB"))
featrue[15]= 1;
else
featrue[15]= 0;
if(behind.getGraTag().equals("SUB"))
featrue[16]= 1;
else
featrue[16]= 0;
if(ahead.getGraTag().equals("OBJ"))
featrue[17]= 1;
else
featrue[17]= 0;
if(behind.getGraTag().equals("OBJ"))
featrue[18]= 1;
else
featrue[18]= 0;
}
public Instance getInst(){
return this.inst;
}
public int[]getFeatrue(){
return this.featrue;
}
public EntityGroup getEgroup(){
return this.eGroup;
}
}