package edu.fudan.ml.types.alphabet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import edu.fudan.util.hash.AbstractHashCode;
import edu.fudan.util.hash.MurmurHash;
import gnu.trove.impl.hash.TIntHash;
import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.map.hash.TIntIntHashMap;
/**
* 特征词典
* @author Feng Ji
*
*/
public final class HashFeatureAlphabet implements IFeatureAlphabet {
private static final long serialVersionUID = -6187935479742068611L;
AbstractHashCode hashcode = new MurmurHash();
private Map<String, String> keyMap;
transient Map<Integer, HashSet<String>> map = new HashMap<Integer, HashSet<String>>();
transient int count = 0;
/**
* 数据
*/
protected TIntIntHashMap intdata;
/**
* 是否冻结
*/
protected boolean frozen;
/**
* 最后一个特征的位置
*/
private int last;
public HashFeatureAlphabet() {
intdata = new TIntIntHashMap(DEFAULT_CAPACITY,DEFAULT_LOAD_FACTOR,noEntryValue,noEntryValue);
frozen = false;
last = 0;
}
@Override
public int lookupIndex(String str) {
return lookupIndex(str, 1);
}
@Override
public int lookupIndex(String str, int indent) {
String s = checkKeyMap(str);
int code = hashcode.hashcode(s);
if(!frozen){
if (!map.containsKey(code))
{
HashSet<String> hashset = new HashSet<String>();
hashset.add(s);
count++;
map.put(code, hashset);
}else{
HashSet<String> hashset = map.get(code);
if (!hashset.contains(s)) {
count++;
hashset.add(s);
}
}
}
return lookupIndex(code, indent);
}
private String checkKeyMap(String s) {
if (keyMap == null)
return s;
else if (keyMap.containsKey(s))
return keyMap.get(s);
else
return s;
}
public int lookupIndex(int code, int indent) {
if (indent < 1)
throw new IllegalArgumentException(
"Invalid Argument in FeatureAlphabet: " + indent);
int ret = intdata.get(code);
if (ret==-1 && !frozen) {//字典中没有,并且允许插入
synchronized (this) {
intdata.put(code, last);
ret = last;
last += indent;
}
}
return ret;
}
@Override
public int size() {
return last;
}
@Override
public int keysize() {
return intdata.size();
}
@Override
public int nonZeroSize() {
return this.intdata.size();
}
@Override
public boolean hasIndex(int id) {
return intdata.containsValue(id);
}
public int remove(String s) {
String str = checkKeyMap(s);
int code = hashcode.hashcode(str);
int ret = -1;
if (intdata.containsKey(code)) {
ret = intdata.remove(code);
}
return ret;
}
public boolean adjust(String s, int adjust) {
String str = checkKeyMap(s);
int code = hashcode.hashcode(str);
return intdata.adjustValue(code, adjust);
}
public void clear() {
intdata.clear();
last=0;
frozen = false;
}
public void countConflict() {
int conflict = 0;
Iterator it = map.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = (Map.Entry)it.next();
HashSet<String> hashset = (HashSet<String>)entry.getValue();
conflict += (hashset.size() - 1);
// if(hashset.size() >1)
// System.out.println(hashset);
}
System.out.println(conflict + " / " + count + " = " + (double)conflict/(double)count);
map.clear();
map =null;
}
@Override
public boolean isStopIncrement() {
return frozen;
}
@Override
public void setStopIncrement(boolean b) {
frozen = b;
}
@Override
public TIntHash toInverseIndexMap() {
// TODO Auto-generated method stub
return null;
}
@Override
public TIntIntIterator iterator() {
return intdata.iterator();
}
/**
* @param keyMap the keyMap to set
*/
public void setKeyMap(Map<String, String> keyMap) {
this.keyMap = keyMap;
}
}