package edu.fudan.ml.types.alphabet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.map.hash.TObjectIntHashMap; import gnu.trove.set.TIntSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * 标记词典,以自增的方式存放标记 * @version 1.0 * */ public final class LabelAlphabet implements IAlphabet { private static final long serialVersionUID = 2877624165731267884L; /** * 数据 */ protected TObjectIntHashMap<String> data; /** * 标记索引 */ private TIntObjectHashMap<String> index; /** * 是否冻结 */ protected boolean frozen; public LabelAlphabet() { data = new TObjectIntHashMap<String>(DEFAULT_CAPACITY,DEFAULT_LOAD_FACTOR,noEntryValue); frozen = false; index = new TIntObjectHashMap<String>(); } @Override public int size() { return index.size(); } /** * 查找标记的索引编号 * @param str 标记 * @return 索引编号 */ @Override public int lookupIndex(String str) { int ret = data.get(str); if (ret ==-1 && !frozen) { ret = index.size(); data.put(str, ret); index.put(ret, str); } return ret; } /** * 查找索引编号对应的标记 * @param id 索引编号 * @return 标记 */ public String lookupString(int id) { return index.get(id); } /** * 查找一组编号对应的标记 * @param ids 索引编号数组 * @return 标记数组 */ public String[] lookupString(int[] ids) { String[] vals = new String[ids.length]; for(int i = 0; i < ids.length; i++) { vals[i] = index.get(ids[i]); } return vals; } public TIntSet toTSet() { return index.keySet(); } public int[] toArray() { return index.keys(); } /** * 得到索引集合 * @return */ public Set<Integer> getIndexSet() { Set<Integer> set = new HashSet<Integer>(); for (TObjectIntIterator<String> it = data.iterator(); it.hasNext();) { it.advance(); set.add(it.value()); } return set; } public Map<String,Integer> toMap() { HashMap<String,Integer> map = new HashMap<String,Integer>(); for (TObjectIntIterator<String> it = data.iterator(); it.hasNext();) { it.advance(); map.put(it.key(), it.value()); } return map; } /** * 得到标签集合 */ public Set<String> toSet() { Set<String> set = new HashSet<String>(); for (TObjectIntIterator<String> it = data.iterator(); it.hasNext();) { it.advance(); set.add(it.key()); } return set; } /** * 将标签集合输出为字符串 */ public String toString() { StringBuffer sb = new StringBuffer(); for (TObjectIntIterator<String> it = data.iterator(); it.hasNext();) { it.advance(); sb.append(it.key()); sb.append(","); } return sb.toString(); } /** * 恢复成新字典 */ public void clear() { index.clear(); data.clear(); } @Override public boolean isStopIncrement() { return frozen; } @Override public void setStopIncrement(boolean b) { this.frozen = b; } }