TempletGroup.java example

Explorer
fudannlp-master
package edu.fudan.nlp.pipe.seq.templet;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
/**
 * 序列标注特征模板组，包含不同的特征生成方式
 * @author xpqiu
 *
 */
public class TempletGroup extends ArrayList<Templet> {
	
	private static final long serialVersionUID = 115444082750769279L;
	
	/**
	 * 模板标识
	 */
	public int gid;
	/**
	 * n阶状态空间映射数组，元素为每一阶对应的一维空间起始地址
	 * 以标记个数为进制
	 */
	public int[] base;
	/**
	 * 最大阶数
	 */
	public int maxOrder;
	/**
	 * 状态组合个数
	 * numStates = numLabels^(maxOrder+1)
	 */
	public int numStates;
	/**
	 * 模板阶数
	 */
	int[] orders;	
	
	
	/**
	 * 不同模板对应状态组合的相对偏移位置
	 */
	public int[][] offset;
	
	public TempletGroup() {
		super();
		gid = 0;
	}
	
	/**
	 * 从文件中读取
	 * @param file
	 * @throws Exception 
	 */
	public void load(String file) throws Exception {

		try {
			InputStreamReader  read = new InputStreamReader (new FileInputStream(file),"utf-8");
			BufferedReader lbin = new BufferedReader(read);
			String str;
			while((str=lbin.readLine())!=null){
				if(str.length()==0)
					continue;
				if(str.charAt(0)=='#')
					continue;
				add(new BaseTemplet(gid++, str));
			}
			lbin.close();
		} catch (IOException e1) {
			e1.printStackTrace();
			throw new Exception("读入模板错误");
		}catch (Exception e) {
			e.printStackTrace();
			throw new Exception("读入模板错误");
		}
	}
	

	public void load_pro(String file) throws Exception {

		try {
			InputStreamReader  read = new InputStreamReader (new FileInputStream(file),"utf-8");
			BufferedReader lbin = new BufferedReader(read);
			String str;
			while((str=lbin.readLine())!=null){
				if(str.length()==0)
					continue;
				if(str.charAt(0)=='#')
					continue;
				add(new ProTemplet(gid++, str));
			}
			lbin.close();
		} catch (IOException e1) {
			e1.printStackTrace();
			throw new Exception("读入模板错误");
		}catch (Exception e) {
			e.printStackTrace();
			throw new Exception("读入模板错误");
		}
	}
	/**
	 * 计算偏移位置
	 * @param numLabels 标记个数
	 */
	public void calc(int numLabels){
		//计算最大阶
		int numTemplets = size();
		this.orders = new int[numTemplets];
		for(int j=0; j<numTemplets; j++) {
			Templet t = get(j);
			this.orders[j] = t.getOrder();
			if (orders[j] > maxOrder)
				maxOrder = orders[j];
		}
		
		base = new int[maxOrder+2];
		base[0]=1;
		for(int i=1; i<base.length; i++) {
			base[i]=base[i-1]*numLabels;
		}
		this.numStates = base[maxOrder+1];
		offset = new int[numTemplets][numStates];
		
		for(int t=0; t<numTemplets; t++) {
			Templet tpl =  this.get(t);
			int[] vars = tpl.getVars();
			/**
			 * 记录每一阶的状态
			 */
			int[] bits = new int[maxOrder+1];
			int v;
			for(int s=0; s<numStates; s++) {
				int d = s;
				//对于一个n阶状态组合，计算每个成员状态
				for(int i=0; i<maxOrder+1; i++) {
					bits[i] = d%numLabels;
					d = d/numLabels;
				}
				//对于一个n阶状态组合，记录一个特征模板映射到特征空间中到基地址的偏移
				//TODO 是否可以和上面合并简化
				v = 0;						
				for(int i=0; i<vars.length; i++) {
					v = v*numLabels + bits[-vars[i]];
				}
				offset[t][s] = v;
			}
		}
	}

	public int[] getOrders()	{
		orders = new int[this.size()];
		for(int i = 0; i < orders.length; i++)	{
			orders[i] = this.get(i).getOrder();
		}
		return orders;
	}

	public int[] getOrders(int o)	{
		int cnt = 0;
		for(int i = 0; i < this.size(); i++)	{
			if (get(i).getOrder() == o)	{
				cnt++;
			}
		}
		int[] ret = new int[cnt];
		for(int i = 0, j = 0; i < this.size(); i++)	{
			if (get(i).getOrder() == o)	{
				ret[j++] = i;
			}
		}
		return ret;
	}
}