package edu.fudan.ml.types.alphabet; import edu.fudan.util.hash.AbstractHashCode; import edu.fudan.util.hash.MurmurHash; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.custom_hash.TObjectIntCustomHashMap; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.strategy.HashingStrategy; /** * 特征词典 * @author Feng Ji * */ public class StringFeatureAlphabet implements IFeatureAlphabet{ private static final long serialVersionUID = -6187935479742068611L; /** * 数据 */ protected TObjectIntCustomHashMap<String> data; /** * 是否冻结 */ protected boolean frozen; /** * 最后一个特征的位置 */ private int last; public StringFeatureAlphabet() { data = new TObjectIntCustomHashMap<String>(new HashingStrategy<String>() { AbstractHashCode hash = new MurmurHash(); @Override public int computeHashCode(String object) { return hash.hashcode(object); } @Override public boolean equals(String o1, String o2) { // return (o1.charAt(0)==o2.charAt(0)); return o1.equals(o2); } }, DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR, noEntryValue); frozen = false; last = 0; } @Override public int lookupIndex(String str) { return lookupIndex(str, 1); } /** * 查询字符串索引编号 * @param str 字符串 * @param indent 间隔 * @return 字符串索引编号,-1表示词典中不存在字符串 */ public int lookupIndex(String str, int indent) { if (indent < 1) throw new IllegalArgumentException( "Invalid Argument in FeatureAlphabet: " + indent); int ret = data.get(str); if (ret==-1 && !frozen) {//字典中没有,并且允许插入 synchronized (this) { data.put(str, last); ret = last; last += indent; } } return ret; } @Override public int size() { return last; } @Override public int keysize() { return data.size(); } @Override public int nonZeroSize() { return this.data.size(); } @Override public boolean hasIndex(int id) { return data.containsValue(id); } public int remove(String str) { int ret = -1; if (data.containsKey(str)) { ret = data.remove(str); } return ret; } public boolean adjust(String str, int adjust) { return data.adjustValue(str, adjust); } public TObjectIntIterator<String> iterator() { return data.iterator(); } public void clear() { data.clear(); last=0; frozen = false; } @Override public TIntObjectHashMap<String> toInverseIndexMap() { TIntObjectHashMap<String> index = new TIntObjectHashMap<String>(); TObjectIntIterator<String> it = data.iterator(); while (it.hasNext()) { it.advance(); String value = it.key(); int key = it.value(); index.put(key, value); } return index; } @Override public boolean isStopIncrement() { return frozen; } @Override public void setStopIncrement(boolean b) { frozen = b; } }