package com.mite8.utils.off_line_util;
import org.ansj.recognition.impl.FilterRecognition;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* Author: blogchong
* Time: 2016/10/9.
* Email: blogchong#qq.com
* 公众号:数据虫巢 ID:blogchong
* Desc: 停用词加载
*/
public class LoadStopWordDic {
//返回一个list进行加载
public List<String> loadStopWordDic(String path) {
List<String> list = new ArrayList<String>();
ClassLoader classLoader = getClass().getClassLoader();
File file = new File(classLoader.getResource(path).getFile());
try (Scanner scanner = new Scanner(file)) {
while (scanner.hasNextLine()) {
String stopWord = scanner.nextLine();
list.add(stopWord);
}
scanner.close();
} catch (IOException e) {
e.printStackTrace();
}
return list;
}
//直接逐个加载
public FilterRecognition insertStopWords(FilterRecognition filter, String path) throws Exception{
ClassLoader classLoader = getClass().getClassLoader();
File file = new File(classLoader.getResource(path).getFile());
try (Scanner scanner = new Scanner(file)) {
while (scanner.hasNextLine()) {
String stopWord = scanner.nextLine();
filter.insertStopWord(stopWord);
}
scanner.close();
} catch (IOException e) {
e.printStackTrace();
}
return filter;
}
//加载IDF基础数据
public Map<String, Integer> loadIdfFile(String idfPath) {
Map<String, Integer> idfMap = new HashMap<String, Integer>();
ClassLoader classLoader = getClass().getClassLoader();
File file = new File(classLoader.getResource(idfPath).getFile());
try (Scanner scanner = new Scanner(file)) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
String[] notes = line.split("\t");
if(notes.length == 2) {
String word = notes[0];
int num = Integer.parseInt(notes[1]);
idfMap.put(word, num);
}
}
scanner.close();
} catch (IOException e) {
e.printStackTrace();
}
return idfMap;
}
}