package org.ansj.library;
import java.io.BufferedReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.ansj.dic.PathToStream;
import org.ansj.domain.KV;
import org.ansj.recognition.impl.StopRecognition;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.StringUtil;
import org.nlpcn.commons.lang.util.logging.Log;
import org.nlpcn.commons.lang.util.logging.LogFactory;
public class StopLibrary {
private static final Log LOG = LogFactory.getLog();
public static final String DEFAULT = "stop";
// 用户自定义词典
private static final Map<String, KV<String, StopRecognition>> STOP = new HashMap<>();
static {
for (Entry<String, String> entry : MyStaticValue.ENV.entrySet()) {
if (entry.getKey().startsWith(DEFAULT)) {
put(entry.getKey(), entry.getValue());
}
}
putIfAbsent(DEFAULT, "library/stop.dic");
}
/**
* 词性过滤
*
* @param key
* @param stopNatures
*/
public static void insertStopNatures(String key, String... filterNatures) {
StopRecognition fr = get(key);
fr.insertStopNatures(filterNatures);
}
/**
* 正则过滤
*
* @param key
* @param regexes
*/
public static void insertStopRegexes(String key, String... regexes) {
StopRecognition fr = get(key);
fr.insertStopRegexes(regexes);
}
/**
* 增加停用词
*
* @param key
* @param regexes
*/
public static void insertStopWords(String key, String... stopWords) {
StopRecognition fr = get(key);
fr.insertStopWords(stopWords);
}
/**
* 增加停用词
*
* @param key
* @param regexes
*/
public static void insertStopWords(String key, List<String> stopWords) {
StopRecognition fr = get(key);
fr.insertStopWords(stopWords);
}
public static StopRecognition get() {
return get(DEFAULT);
}
/**
* 根据模型名称获取crf模型
*
* @param modelName
* @return
*/
public static StopRecognition get(String key) {
KV<String, StopRecognition> kv = STOP.get(key);
if (kv == null) {
if (MyStaticValue.ENV.containsKey(key)) {
putIfAbsent(key, MyStaticValue.ENV.get(key));
return get(key);
}
LOG.warn("STOP " + key + " not found in config ");
return null;
}
StopRecognition stopRecognition = kv.getV();
if (stopRecognition == null) {
stopRecognition = init(key, kv, false);
}
return stopRecognition;
}
/**
* 用户自定义词典加载
*
* @param key
* @param path
* @return
*/
private synchronized static StopRecognition init(String key, KV<String, StopRecognition> kv, boolean reload) {
StopRecognition stopRecognition = kv.getV();
if (stopRecognition != null) {
if (reload) {
stopRecognition.clear();
} else {
return stopRecognition;
}
} else {
stopRecognition = new StopRecognition();
}
try {
LOG.debug("begin init FILTER !");
long start = System.currentTimeMillis();
String temp = null;
String[] strs = null;
try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
while ((temp = br.readLine()) != null) {
if (StringUtil.isNotBlank(temp)) {
temp = StringUtil.trim(temp);
strs = temp.split("\t");
if (strs.length == 1) {
stopRecognition.insertStopWords(strs[0]);
} else {
switch (strs[1]) {
case "nature":
stopRecognition.insertStopNatures(strs[0]);
break;
case "regex":
stopRecognition.insertStopRegexes(strs[0]);
break;
default:
stopRecognition.insertStopWords(strs[0]);
break;
}
}
}
}
}
LOG.info("load stop use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
kv.setV(stopRecognition);
return stopRecognition;
} catch (Exception e) {
LOG.error("Init Stop library error :" + e.getMessage() + ", path: " + kv.getK());
STOP.remove(key);
return null;
}
}
/**
* 动态添加词典
*
* @param FILTERDefault
* @param FILTERDefault2
* @param FILTER2
*/
public static void put(String key, String path, StopRecognition stopRecognition) {
STOP.put(key, KV.with(path, stopRecognition));
MyStaticValue.ENV.put(key, path);
}
/**
* 动态添加词典
*
* @param FILTERDefault
* @param FILTERDefault2
* @param FILTER2
*/
public static void putIfAbsent(String key, String path) {
if (!STOP.containsKey(key)) {
STOP.put(key, KV.with(path, (StopRecognition) null));
}
}
/**
* 动态添加词典
*
* @param FILTERDefault
* @param FILTERDefault2
* @param FILTER2
*/
public static void put(String key, String path) {
put(key, path, null);
}
/**
* 动态添加词典
*
* @param <T>
* @param <T>
*
* @param FILTERDefault
* @param FILTERDefault2
* @param FILTER2
*/
public static synchronized StopRecognition putIfAbsent(String key, String path, StopRecognition stopRecognition) {
KV<String, StopRecognition> kv = STOP.get(key);
if (kv != null && kv.getV() != null) {
return kv.getV();
}
put(key, path, stopRecognition);
return stopRecognition;
}
public static KV<String, StopRecognition> remove(String key) {
KV<String, StopRecognition> kv = STOP.get(key);
if (kv != null && kv.getV() != null) {
kv.getV().clear();
}
MyStaticValue.ENV.remove(key) ;
return STOP.remove(key);
}
public static Set<String> keys() {
return STOP.keySet();
}
public static void reload(String key) {
if (!MyStaticValue.ENV.containsKey(key)) { //如果变量中不存在直接删掉这个key不解释了
remove(key);
}
putIfAbsent(key, MyStaticValue.ENV.get(key));
KV<String, StopRecognition> kv = STOP.get(key);
init(key, kv, true);
}
}