package org.ansj.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PropertyResourceBundle;
import java.util.ResourceBundle;
import org.ansj.app.crf.SplitWord;
import org.ansj.dic.DicReader;
import org.ansj.dic.impl.Jdbc2Stream;
import org.ansj.domain.AnsjItem;
import org.ansj.exception.LibraryException;
import org.ansj.library.AmbiguityLibrary;
import org.ansj.library.CrfLibrary;
import org.ansj.library.DATDictionary;
import org.ansj.library.DicLibrary;
import org.ansj.library.StopLibrary;
import org.ansj.library.SynonymsLibrary;
import org.ansj.recognition.impl.StopRecognition;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.FileFinder;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.ObjConver;
import org.nlpcn.commons.lang.util.StringUtil;
import org.nlpcn.commons.lang.util.logging.Log;
import org.nlpcn.commons.lang.util.logging.LogFactory;
/**
* 这个类储存一些公用变量.
*
* @author ansj
*
*/
public class MyStaticValue {
public static final Log LOG = LogFactory.getLog(MyStaticValue.class);
// 是否开启人名识别
public static Boolean isNameRecognition = true;
// 是否开启数字识别
public static Boolean isNumRecognition = true;
// 是否数字和量词合并
public static Boolean isQuantifierRecognition = true;
// 是否显示真实词语
public static Boolean isRealName = false;
/**
* 是否用户辞典不加载相同的词
*/
public static boolean isSkipUserDefine = false;
public static final Map<String, String> ENV = new HashMap<>();
static {
/**
* 配置文件变量
*/
ResourceBundle rb = null;
try {
rb = ResourceBundle.getBundle("ansj_library");
} catch (Exception e) {
try {
File find = FileFinder.find("ansj_library.properties", 1);
if (find != null && find.isFile()) {
rb = new PropertyResourceBundle(IOUtil.getReader(find.getAbsolutePath(), System.getProperty("file.encoding")));
LOG.info("load ansj_library not find in classPath ! i find it in " + find.getAbsolutePath() + " make sure it is your config!");
}
} catch (Exception e1) {
LOG.warn("not find ansj_library.properties. reason: " + e1.getMessage());
}
}
if (rb == null) {
try {
rb = ResourceBundle.getBundle("library");
} catch (Exception e) {
try {
File find = FileFinder.find("library.properties", 2);
if (find != null && find.isFile()) {
rb = new PropertyResourceBundle(IOUtil.getReader(find.getAbsolutePath(), System.getProperty("file.encoding")));
LOG.info("load library not find in classPath ! i find it in " + find.getAbsolutePath() + " make sure it is your config!");
}
} catch (Exception e1) {
LOG.warn("not find library.properties. reason: " + e1.getMessage());
}
}
}
if (rb == null) {
LOG.warn("not find library.properties in classpath use it by default !");
} else {
for (String key : rb.keySet()) {
ENV.put(key, rb.getString(key));
try {
String value = rb.getString(key);
if (value.startsWith("jdbc:")) { //给jdbc窜中密码做一个加密,不让密码明文在日志中
value = Jdbc2Stream.encryption(value);
}
LOG.info("init " + key + " to env value is : " + value);
Field field = MyStaticValue.class.getField(key);
field.set(null, ObjConver.conversion(rb.getString(key), field.getType()));
} catch (Exception e) {
}
}
}
}
/**
* 人名词典
*
* @return
*/
public static BufferedReader getPersonReader() {
return DicReader.getReader("person/person.dic");
}
/**
* 机构名词典
*
* @return
*/
public static BufferedReader getCompanReader() {
return DicReader.getReader("company/company.data");
}
/**
* 机构名词典
*
* @return
*/
public static BufferedReader getNewWordReader() {
return DicReader.getReader("newWord/new_word_freq.dic");
}
/**
* 核心词典
*
* @return
*/
public static BufferedReader getArraysReader() {
return DicReader.getReader("arrays.dic");
}
/**
* 数字词典
*
* @return
*/
public static BufferedReader getNumberReader() {
return DicReader.getReader("numberLibrary.dic");
}
/**
* 英文词典
*
* @return
*/
public static BufferedReader getEnglishReader() {
return DicReader.getReader("englishLibrary.dic");
}
/**
* 词性表
*
* @return
*/
public static BufferedReader getNatureMapReader() {
return DicReader.getReader("nature/nature.map");
}
/**
* 词性关联表
*
* @return
*/
public static BufferedReader getNatureTableReader() {
return DicReader.getReader("nature/nature.table");
}
/**
* 得道姓名单字的词频词典
*
* @return
*/
public static BufferedReader getNatureClassSuffix() {
return DicReader.getReader("nature_class_suffix.txt");
}
/**
* 根据词语后缀判断词性
*
* @return
*/
public static BufferedReader getPersonFreqReader() {
return DicReader.getReader("person/name_freq.dic");
}
/**
* 名字词性对象反序列化
*
* @return
*/
@SuppressWarnings("unchecked")
public static Map<String, int[][]> getPersonFreqMap() {
Map<String, int[][]> map = new HashMap<String, int[][]>(0);
try (InputStream inputStream = DicReader.getInputStream("person/asian_name_freq.data")) {
ObjectInputStream objectInputStream = new ObjectInputStream(inputStream);
map = (Map<String, int[][]>) objectInputStream.readObject();
} catch (IOException e) {
LOG.warn("IO异常", e);
} catch (ClassNotFoundException e) {
LOG.warn("找不到类", e);
}
return map;
}
/**
* 词与词之间的关联表数据
*
* @return
*/
public static void initBigramTables() {
try (BufferedReader reader = IOUtil.getReader(DicReader.getInputStream("bigramdict.dic"), "UTF-8")) {
String temp = null;
String[] strs = null;
int freq = 0;
while ((temp = reader.readLine()) != null) {
if (StringUtil.isBlank(temp)) {
continue;
}
strs = temp.split("\t");
freq = Integer.parseInt(strs[1]);
strs = strs[0].split("@");
AnsjItem fromItem = DATDictionary.getItem(strs[0]);
AnsjItem toItem = DATDictionary.getItem(strs[1]);
if (fromItem == AnsjItem.NULL && strs[0].contains("#")) {
fromItem = AnsjItem.BEGIN;
}
if (toItem == AnsjItem.NULL && strs[1].contains("#")) {
toItem = AnsjItem.END;
}
if (fromItem == AnsjItem.NULL || toItem == AnsjItem.NULL) {
continue;
}
if (fromItem.bigramEntryMap == null) {
fromItem.bigramEntryMap = new HashMap<Integer, Integer>();
}
fromItem.bigramEntryMap.put(toItem.getIndex(), freq);
}
} catch (NumberFormatException e) {
LOG.warn("数字格式异常", e);
} catch (UnsupportedEncodingException e) {
LOG.warn("不支持的编码", e);
} catch (IOException e) {
LOG.warn("IO异常", e);
}
}
/*
* 外部引用为了实例化加载变量
*/
public static Log getLog(Class<?> clazz) {
return LogFactory.getLog(clazz);
}
/**
* 增加一个词典
*
* @param key
* @param path
* @param value
*/
public static void putLibrary(String key, String path, Object value) {
if (key.startsWith(DicLibrary.DEFAULT)) {
DicLibrary.put(key, path, (Forest) value);
} else if (key.startsWith(StopLibrary.DEFAULT)) {
StopLibrary.put(key, path, (StopRecognition) value);
} else if (key.startsWith(SynonymsLibrary.DEFAULT)) {
SynonymsLibrary.put(key, path, (SmartForest) value);
} else if (key.startsWith(AmbiguityLibrary.DEFAULT)) {
AmbiguityLibrary.put(key, path, (Forest) value);
} else if (key.startsWith(CrfLibrary.DEFAULT)) {
CrfLibrary.put(key, path, (SplitWord) value);
} else {
throw new LibraryException(key + " type err must start with dic,stop,ambiguity,synonyms");
}
ENV.put(key, path);
}
/**
* 懒加载一个词典
*
* @param key
* @param path
*/
public static void putLibrary(String key, String path) {
if (key.startsWith(DicLibrary.DEFAULT)) {
DicLibrary.put(key, path);
} else if (key.startsWith(StopLibrary.DEFAULT)) {
StopLibrary.put(key, path);
} else if (key.startsWith(SynonymsLibrary.DEFAULT)) {
SynonymsLibrary.put(key, path);
} else if (key.startsWith(AmbiguityLibrary.DEFAULT)) {
AmbiguityLibrary.put(key, path);
} else if (key.startsWith(CrfLibrary.DEFAULT)) {
CrfLibrary.put(key, path);
} else {
throw new LibraryException(key + " type err must start with dic,stop,ambiguity,synonyms");
}
ENV.put(key, path);
}
/**
* 删除一个词典
*
* @param key
*/
public static void removeLibrary(String key) {
if (key.startsWith(DicLibrary.DEFAULT)) {
DicLibrary.remove(key);
} else if (key.startsWith(StopLibrary.DEFAULT)) {
StopLibrary.remove(key);
} else if (key.startsWith(SynonymsLibrary.DEFAULT)) {
SynonymsLibrary.remove(key);
} else if (key.startsWith(AmbiguityLibrary.DEFAULT)) {
AmbiguityLibrary.remove(key);
} else if (key.startsWith(CrfLibrary.DEFAULT)) {
CrfLibrary.remove(key);
} else {
throw new LibraryException(key + " type err must start with dic,stop,ambiguity,synonyms");
}
ENV.remove(key);
}
/**
* 重置一个词典
*
* @param key
*/
public static void reloadLibrary(String key) {
if (key.startsWith(DicLibrary.DEFAULT)) {
DicLibrary.reload(key);
} else if (key.startsWith(StopLibrary.DEFAULT)) {
StopLibrary.reload(key);
} else if (key.startsWith(SynonymsLibrary.DEFAULT)) {
SynonymsLibrary.reload(key);
} else if (key.startsWith(AmbiguityLibrary.DEFAULT)) {
AmbiguityLibrary.reload(key);
} else if (key.startsWith(CrfLibrary.DEFAULT)) {
CrfLibrary.reload(key);
} else {
throw new LibraryException(key + " type err must start with dic,stop,ambiguity,synonyms");
}
}
}