package context.core.task.sentiment;
/**
* Revised by Ming Jiang Now can be used for N-grams;
*
*/
import context.core.entity.CorpusData;
import context.core.entity.FileData;
import context.core.entity.TabularData;
import context.core.task.pos.POSTagger;
import context.core.util.CorpusAggregator;
import context.core.util.JavaIO;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
* @author Aale
*/
public class SentimentBody {
private SentimentTaskInstance instance;
private CorpusData input;
private List<TabularData> tabularOutput;
private FileData sentimentFile;
private String log;
private ArrayList<Formatter> allOuts;
private List<String> ToCSV;// output for html parsing of sentiment
private List<String> ToHumanCSV;// output for human readability to be displayed in ConText
private StanfordCoreNLP pipeline;
String SentimentString;
/**
*
* @param instance
*/
public SentimentBody(SentimentTaskInstance instance) {
// TODO Auto-generated method stub
this.instance = instance; // String path to a file containing the list of sentiment
init();
}
private void init() {
this.input = (CorpusData) instance.getInput();
this.tabularOutput = instance.getTabularOutput();
this.pipeline = instance.getPipeline();
this.sentimentFile = instance.getSentimentFile();
}
/**
*
* @return @throws IOException
*/
public boolean RunSentimentAnalysis() throws IOException {
SentimentString = sentimentFile.readFileIntoString();
SentimentString = SentimentString.toLowerCase();
//read csv file (Sentiment_Dictionary)
String[] SentimentWords = SentimentString.split("\n");
HashMap sentiMap1 = new HashMap();
Map<String, String[]> sentiMap_nwords = new HashMap<String, String[]>();
Map<String, String[]> sentiMap_swords = new HashMap<String, String[]>();
List<String> Ngrams = new ArrayList<String>();
for (String line : SentimentWords) {
if (line.contains("Word,POS")) {
continue;
}
String[] line_temp = line.split(",");
String line_word = line_temp[0];
String line_pol = line_temp[3];
String line_pos = "NA";
String line_subj = "NA";
if (!line_temp[1].matches("")) {
line_pos = line_temp[1];
}
if (line_temp.length == 5 && !line_temp[4].matches("")) {
line_subj = line_temp[4];
}
String[] line_val = {line_pol, line_subj};
if (!line_pos.matches("NA")) {
String[] line_key = {line_word, line_pos};
SentimentObj SObj = new SentimentObj(line_key.clone());
sentiMap1.put(SObj, line_val);
} else if (line_word.contains(" ")) {
String temp = line_word.replaceAll(" ", "_");
sentiMap_nwords.put(temp, line_val); //n-grams without completed info
Ngrams.add(line_word);
} else {
sentiMap_swords.put(line_word, line_val); //single-grams without completed info
}
//SentimentObj SObj = new SentimentObj(line_key.clone());
}// using a hashmap to store the sentiment values of the words for faster access
//String lenOfSentWord = (SentimentWords[0].split("word1=")[1].split(" ")[0]);
ToCSV = new ArrayList<String>();
ToHumanCSV = new ArrayList<String>();
List<List<String[]>> toAggregate = new ArrayList<List<String[]>>();
List<FileData> files = input.getFiles();
try {
for (FileData docData : files) {
File documentFile = docData.getFile();
List<String[]> SentimentTags = new ArrayList<String[]>();
String documentString = null;
try {
documentString = JavaIO.readFile(documentFile);
documentString = documentString.toLowerCase();
for (int j = 0; j < Ngrams.size(); j++) {
if (documentString.contains(Ngrams.get(j))) {
//Label = "T";
//Find_P.add(Ngrams.get(j).split(" "));
String temp = Ngrams.get(j).replaceAll(" ", "_");
documentString = documentString.replaceAll(Ngrams.get(j), temp);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
}
Annotation document = new Annotation(documentString);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
List<String[]> docWords = new ArrayList<String[]>();
String[] wordPOS = new String[3];
for (CoreMap sentence : sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
final List<CoreLabel> sent = sentence.get(TokensAnnotation.class);
final List<TaggedWord> taggedWords = POSTagger.tag(sent, "en");
for (TaggedWord token : taggedWords) {
// this is the text of the token
String word = token.word();
// this is the POS tag of the token
if (word.matches("\\W")) {
if (!word.matches("[0-9a-zA-Z.;:\'\"]*")) {
continue;
}
}
if (word.matches("[;.\'\"]")) {
continue;
}
// this is the POS tag of the token
String pos = token.tag();
wordPOS[0] = word;
if (pos.contains("JJ")) {
wordPOS[1] = "adj";
} else if (pos.contains("NN")) {
wordPOS[1] = "noun";
} else if (pos.contains("VB")) {
wordPOS[1] = "verb";
} else if (pos.contains("RB")) {
wordPOS[1] = "adverb";
} else {
wordPOS[1] = "anypos";
}
wordPOS[2] = pos;
docWords.add(wordPOS.clone());
}
}
// documentString = documentString.replaceAll(",",",");
ArrayList<String[]> outputList = new ArrayList<String[]>();
for (String[] word_and_pos : docWords) {
String[] word_temp = {word_and_pos[0], word_and_pos[1]};
SentimentObj word_pos_obj = new SentimentObj(word_temp);
//System.out.println (word_temp[0]);
if (sentiMap1.containsKey(word_pos_obj)) {
//System.out.println (word_temp[0]);
String polarity = ((String[]) sentiMap1.get(word_pos_obj))[0];
String color = "000000";
if (polarity.equals("positive")) {
color = "(0,230,0)";
}
if (polarity.equals("neutral")) {
color = "(0,0,230)";
}
if (polarity.equals("negative")) {
color = "(230,0,0)";
}
String[] sentiTag = {word_and_pos[0], word_and_pos[2], polarity, Integer.toString(1)};
SentimentTags.add(sentiTag.clone());
//System.out.println(SentimentTags.size());
String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(word_and_pos[0]) + " </span>", color};
outputList.add(tempArray);
} else if (sentiMap_swords.containsKey(word_temp[0])) {
//System.out.println (word_temp[0]);
String polarity = ((String[]) sentiMap_swords.get(word_temp[0]))[0];
String color = "000000";
if (polarity.equals("positive")) {
color = "(0,230,0)";
}
if (polarity.equals("neutral")) {
color = "(0,0,230)";
}
if (polarity.equals("negative")) {
color = "(230,0,0)";
}
String[] sentiTag = {word_and_pos[0], word_and_pos[2], polarity, Integer.toString(1)};
//System.out.println(SentimentTags.size());
SentimentTags.add(sentiTag.clone());
String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(word_and_pos[0]) + " </span>", color};
outputList.add(tempArray);
//System.out.println(SentimentTags.size());
} else if (sentiMap_nwords.containsKey(word_temp[0])) {
//System.out.println (word_temp[0]);
String polarity = ((String[]) sentiMap_nwords.get(word_temp[0]))[0];
String color = "000000";
if (polarity.equals("positive")) {
color = "(0,230,0)";
}
if (polarity.equals("neutral")) {
color = "(0,0,230)";
}
if (polarity.equals("negative")) {
color = "(230,0,0)";
}
String phrase = word_and_pos[0].replaceAll("_", " ");
String[] sentiTag = {phrase, word_and_pos[2], polarity, Integer.toString(1)};
//System.out.println(SentimentTags.size());
SentimentTags.add(sentiTag.clone());
String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(phrase) + " </span>", color};
outputList.add(tempArray);
//System.out.println(SentimentTags.size());
} else {
String[] tempArray = {word_and_pos[0], null};
outputList.add(tempArray);
}
}
if (SentimentTags.size() > 0) {
//System.out.println (SentimentTags.size());
toAggregate.add(SentimentTags);// Adding the list of words from text with sentiment
}
}
} catch (Exception e) {
e.printStackTrace();
return false;
}
List<String[]> ToHumanCSVArray = new CorpusAggregator().CorpusAggregate(toAggregate); // collate the list of words/POSs with sentiment so that
// each word-POS pair has the proper count.
ToHumanCSV.add("Term,Part Of Speech,Sentiment,Frequency");
for (String[] HumanArray : ToHumanCSVArray) {
String HumanTemp = HumanArray[0] + "," + HumanArray[1] + "," + HumanArray[2] + "," + HumanArray[3];
//System.out.println (HumanTemp);
ToHumanCSV.add(HumanTemp);
}
return true;
}
/**
*
* @return
*/
public List<String> GetCSV() {
return ToCSV;
}
/**
*
* @return
*/
public List<String> GetHumanCSV() {
return ToHumanCSV;
}
/**
*
* @param filepath
*/
public void writeOutput(String filepath) {
this.writeCsv(GetHumanCSV(), filepath);
}
private void writeCsv(List<String> ToHumanCSV, String filepath) {
StringBuffer sb = new StringBuffer();
String toWrite = "";
for (int i1 = 0; i1 < ToHumanCSV.size(); i1++) {
toWrite = ToHumanCSV.get(i1) + "\n";
sb.append(toWrite);
}
// 2016.03 Add this code to delete existing file
File toDelete = new File(filepath);
if (toDelete.exists()) {
toDelete.delete();
}
//
FileData.writeDataIntoFile(sb.toString(), filepath);
}
}