package edu.fudan.nlp.pipe.seq.templet;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
/**
* 序列标注特征模板组,包含不同的特征生成方式
* @author xpqiu
*
*/
public class TempletGroup extends ArrayList<Templet> {
private static final long serialVersionUID = 115444082750769279L;
/**
* 模板标识
*/
public int gid;
/**
* n阶状态空间映射数组,元素为每一阶对应的一维空间起始地址
* 以标记个数为进制
*/
public int[] base;
/**
* 最大阶数
*/
public int maxOrder;
/**
* 状态组合个数
* numStates = numLabels^(maxOrder+1)
*/
public int numStates;
/**
* 模板阶数
*/
int[] orders;
/**
* 不同模板对应状态组合的相对偏移位置
*/
public int[][] offset;
public TempletGroup() {
super();
gid = 0;
}
/**
* 从文件中读取
* @param file
* @throws Exception
*/
public void load(String file) throws Exception {
try {
InputStreamReader read = new InputStreamReader (new FileInputStream(file),"utf-8");
BufferedReader lbin = new BufferedReader(read);
String str;
while((str=lbin.readLine())!=null){
if(str.length()==0)
continue;
if(str.charAt(0)=='#')
continue;
add(new BaseTemplet(gid++, str));
}
lbin.close();
} catch (IOException e1) {
e1.printStackTrace();
throw new Exception("读入模板错误");
}catch (Exception e) {
e.printStackTrace();
throw new Exception("读入模板错误");
}
}
public void load_pro(String file) throws Exception {
try {
InputStreamReader read = new InputStreamReader (new FileInputStream(file),"utf-8");
BufferedReader lbin = new BufferedReader(read);
String str;
while((str=lbin.readLine())!=null){
if(str.length()==0)
continue;
if(str.charAt(0)=='#')
continue;
add(new ProTemplet(gid++, str));
}
lbin.close();
} catch (IOException e1) {
e1.printStackTrace();
throw new Exception("读入模板错误");
}catch (Exception e) {
e.printStackTrace();
throw new Exception("读入模板错误");
}
}
/**
* 计算偏移位置
* @param numLabels 标记个数
*/
public void calc(int numLabels){
//计算最大阶
int numTemplets = size();
this.orders = new int[numTemplets];
for(int j=0; j<numTemplets; j++) {
Templet t = get(j);
this.orders[j] = t.getOrder();
if (orders[j] > maxOrder)
maxOrder = orders[j];
}
base = new int[maxOrder+2];
base[0]=1;
for(int i=1; i<base.length; i++) {
base[i]=base[i-1]*numLabels;
}
this.numStates = base[maxOrder+1];
offset = new int[numTemplets][numStates];
for(int t=0; t<numTemplets; t++) {
Templet tpl = this.get(t);
int[] vars = tpl.getVars();
/**
* 记录每一阶的状态
*/
int[] bits = new int[maxOrder+1];
int v;
for(int s=0; s<numStates; s++) {
int d = s;
//对于一个n阶状态组合,计算每个成员状态
for(int i=0; i<maxOrder+1; i++) {
bits[i] = d%numLabels;
d = d/numLabels;
}
//对于一个n阶状态组合,记录一个特征模板映射到特征空间中到基地址的偏移
//TODO 是否可以和上面合并简化
v = 0;
for(int i=0; i<vars.length; i++) {
v = v*numLabels + bits[-vars[i]];
}
offset[t][s] = v;
}
}
}
public int[] getOrders() {
orders = new int[this.size()];
for(int i = 0; i < orders.length; i++) {
orders[i] = this.get(i).getOrder();
}
return orders;
}
public int[] getOrders(int o) {
int cnt = 0;
for(int i = 0; i < this.size(); i++) {
if (get(i).getOrder() == o) {
cnt++;
}
}
int[] ret = new int[cnt];
for(int i = 0, j = 0; i < this.size(); i++) {
if (get(i).getOrder() == o) {
ret[j++] = i;
}
}
return ret;
}
}