package org.ansj.test; import java.io.BufferedReader; import java.io.IOException; import java.util.TreeMap; import org.ansj.domain.AnsjItem; import org.nlpcn.commons.lang.dat.DATMaker; import org.nlpcn.commons.lang.dat.Item; import org.nlpcn.commons.lang.util.IOUtil; /** * 构建核心词典 * * @author ansj * */ public class CoreLibraryMaker { public static void main(String[] args) throws Exception { makeDic(); DATMaker datM = new DATMaker(); datM.maker("train_file/library.txt", AnsjItem.class); Item[] dat = datM.getDAT(); insertToArray(dat, '%', (byte) 5, "{nb=1}"); insertToArray(dat, '.', (byte) 5, "{nb=1}"); for (int i = '0'; i <= '9'; i++) { insertToArray(dat, (char) i, (byte) 5, "{nb=1}"); } for (int i = '0'; i <= '9'; i++) { insertToArray(dat, (char) i, (byte) 5, "{nb=1}"); } insertToArray(dat, '\'', (byte) 4, "{en=1}"); for (int i = 'a'; i <= 'z'; i++) { insertToArray(dat, (char) i, (byte) 4, "{en=1}"); } for (int i = 'a'; i <= 'z'; i++) { insertToArray(dat, (char) i, (byte) 4, "{en=1}"); } for (int i = 'A'; i <= 'Z'; i++) { insertToArray(dat, (char) i, (byte) 4, "{en=1}"); } for (int i = 'A'; i <= 'Z'; i++) { insertToArray(dat, (char) i, (byte) 4, "{en=1}"); } datM.saveText("src/main/resources/core.dic"); } private static void insertToArray(Item[] dat, char c, byte status, String param) { AnsjItem ansjItem1 = new AnsjItem(); ansjItem1.setName(String.valueOf(c)); ansjItem1.setIndex(c); ansjItem1.setCheck(-1); ansjItem1.setStatus(status); ansjItem1.param = param; dat[c] = ansjItem1; } public static void makeDic() throws NumberFormatException, IOException { BufferedReader br = IOUtil.getReader("train_file/dic.txt", "utf-8"); String temp = null; TreeMap<String, TreeMap<String, Integer>> dic = new TreeMap<String, TreeMap<String, Integer>>(); while ((temp = br.readLine()) != null) { if (temp.indexOf('#') > -1) { continue; } temp = temp.replace(String.valueOf(((char) 0)), ""); String[] split = temp.split("\t"); if (dic.containsKey(split[1])) { if (dic.get(split[1]).containsKey(split[0])) { System.out.println("err"); } dic.get(split[1]).put(split[0], Integer.parseInt(split[2])); } else { TreeMap<String, Integer> tm = new TreeMap<String, Integer>(); tm.put(split[0], Integer.parseInt(split[2])); dic.put(split[1], tm); } } IOUtil.writeMap(dic, "train_file/library.txt", IOUtil.UTF8); } }