package com.yc.nlp.sentiment;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.yc.nlp.Setup;
import com.yc.nlp.classification.Bayes;
import com.yc.nlp.normal.Normal;
import com.yc.nlp.pojo.ClassifyResult;
import com.yc.nlp.seg.InitSeg;
import com.yc.nlp.util.MemFile;
public class Sentiment {
private static Logger logger = LoggerFactory.getLogger(Sentiment.class);
private Bayes classifier;
private InitSeg seg;
private Normal normal;
public Sentiment() {
logger.debug("initialize sentiment begin...");
this.classifier = new Bayes();
seg = Setup.getSeg();
normal = Setup.getNormal();
// seg = new InitSeg();
// normal = new Normal();
this.load("com/yc/nlp/sentiment/sentiment.marshal");
logger.debug("initialize sentiment end...");
}
public void save(String fname) {
this.classifier.save(fname);
}
public void load(String fname) {
this.classifier.load(fname);
}
/**
* 对分词结果进行停止词过滤
*
* @param doc
* @return
*/
public List<String> handle(String doc) {
return normal.filterStop(new ArrayList<String>(seg.seg(doc)));
}
/**
* 贝叶斯分类训练
*
* @param negFile
* 消极文件
* @param posFile
* 积极文件
*/
public void train(String negFile, String posFile) {
List<String> negDocs = new ArrayList<String>();
List<String> posDocs = new ArrayList<String>();
BufferedReader br = MemFile.readFile(negFile, this);
if (br != null) {
negDocs = MemFile.sentimentFile(br, negDocs);
}
br = MemFile.readFile(posFile, this);
if (br != null) {
posDocs = MemFile.sentimentFile(br, posDocs);
}
List<Object[]> data = new ArrayList<Object[]>();
for (String sent : negDocs) {
List<String> words = this.handle(sent);
Object[] arr = new Object[2];
arr[0] = words;
arr[1] = "neg";
data.add(arr);
}
for (String sent : posDocs) {
List<String> words = this.handle(sent);
Object[] arr = new Object[2];
arr[0] = words;
arr[1] = "pos";
data.add(arr);
}
this.classifier.train(data);
}
/**
* 贝叶斯分类
*
* @param sent
* @return
*/
public double classify(String sent) {
ClassifyResult result = this.classifier.classify(this.handle(sent));
if (result.getRet().equals("pos")) {
return result.getProb();
}
return 1 - result.getProb();
}
public static void main(String[] args) {
Sentiment sentiment = new Sentiment();
sentiment.train("neg.txt", "pos.txt");
sentiment.save("sentiment.marshal");
sentiment.load("com/yc/nlp/sentiment/sentiment.marshal");
}
}