package com.antbrains; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.map.hash.TObjectIntHashMap; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.Path; import com.antbrains.crf.CompactedTroveFeatureDict; import com.antbrains.crf.FeatureDict; import com.antbrains.crf.TroveFeatureDict; import com.antbrains.crf.hadoop.FileTools; public class TestFeatureDict { public static void main(String[] args) throws IOException { String dict = "/usr/share/dict/words"; Random rnd = new Random(0); FeatureDict fd = new CompactedTroveFeatureDict(10240); List<String> lines = FileTools.readFile2List(dict, "UTF8"); TObjectIntHashMap<String> map = new TObjectIntHashMap<String>(100, 0.8f, -1); int lineNum = 0; List<String> keys = new ArrayList<String>(); for (String line : lines) { String prefix = "p" + rnd.nextInt(10); String key = prefix + ":" + line; keys.add(key); fd.get(key, true); map.put(key, lineNum++); } // check if (fd.size() != lines.size()) { System.out.println("size not equal"); } TObjectIntIterator<String> iter = fd.iterator(); lineNum = 0; while (iter.hasNext()) { iter.advance(); String k = iter.key(); if (!map.containsKey(k)) { System.out.println("not found: " + k); } int v = map.get(k); if (v != iter.value()) { System.out.println("value not equals"); } lineNum++; } if (lineNum != lines.size()) { System.out.println("wrong"); } lineNum = 0; for (String key : keys) { int v = fd.get(key, false); if (v == -1) { System.out.println("key not found: " + key); } if (v != lineNum) { System.out.println("value not equal: " + key); } lineNum++; } } }