SentimentBody.java example

Explorer
context-master
- src
  - context
package context.core.task.sentiment;

/**
 * Revised by Ming Jiang Now can be used for N-grams;
 *
 */
import context.core.entity.CorpusData;
import context.core.entity.FileData;
import context.core.entity.TabularData;
import context.core.task.pos.POSTagger;
import context.core.util.CorpusAggregator;
import context.core.util.JavaIO;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 *
 * @author Aale
 */
public class SentimentBody {

    private SentimentTaskInstance instance;
    private CorpusData input;
    private List<TabularData> tabularOutput;

    private FileData sentimentFile;
    private String log;
    private ArrayList<Formatter> allOuts;
    private List<String> ToCSV;// output for html parsing of sentiment
    private List<String> ToHumanCSV;// output for human readability to be displayed in ConText
    private StanfordCoreNLP pipeline;
    String SentimentString;

    /**
     *
     * @param instance
     */
    public SentimentBody(SentimentTaskInstance instance) {
        // TODO Auto-generated method stub
        this.instance = instance; // String path to a file containing the list of sentiment
        init();

    }

    private void init() {

        this.input = (CorpusData) instance.getInput();
        this.tabularOutput = instance.getTabularOutput();
        this.pipeline = instance.getPipeline();
        this.sentimentFile = instance.getSentimentFile();

    }

    /**
     *
     * @return @throws IOException
     */
    public boolean RunSentimentAnalysis() throws IOException {

        SentimentString = sentimentFile.readFileIntoString();
        SentimentString = SentimentString.toLowerCase();

        //read csv file (Sentiment_Dictionary)
        String[] SentimentWords = SentimentString.split("\n");

        HashMap sentiMap1 = new HashMap();
        Map<String, String[]> sentiMap_nwords = new HashMap<String, String[]>();
        Map<String, String[]> sentiMap_swords = new HashMap<String, String[]>();
        List<String> Ngrams = new ArrayList<String>();

        for (String line : SentimentWords) {
            if (line.contains("Word,POS")) {
                continue;
            }
            String[] line_temp = line.split(",");

            String line_word = line_temp[0];
            String line_pol = line_temp[3];

            String line_pos = "NA";
            String line_subj = "NA";

            if (!line_temp[1].matches("")) {
                line_pos = line_temp[1];
            }

            if (line_temp.length == 5 && !line_temp[4].matches("")) {
                line_subj = line_temp[4];
            }

            String[] line_val = {line_pol, line_subj};
            if (!line_pos.matches("NA")) {
                String[] line_key = {line_word, line_pos};
                SentimentObj SObj = new SentimentObj(line_key.clone());
                sentiMap1.put(SObj, line_val);
            } else if (line_word.contains(" ")) {
                String temp = line_word.replaceAll(" ", "_");
                sentiMap_nwords.put(temp, line_val);  //n-grams without completed info
                Ngrams.add(line_word);
            } else {
                sentiMap_swords.put(line_word, line_val);  //single-grams without completed info
            }

            //SentimentObj SObj = new SentimentObj(line_key.clone());
        }// using a hashmap to store the sentiment values of the words for faster access

        //String lenOfSentWord = (SentimentWords[0].split("word1=")[1].split(" ")[0]);
        ToCSV = new ArrayList<String>();
        ToHumanCSV = new ArrayList<String>();
        List<List<String[]>> toAggregate = new ArrayList<List<String[]>>();

        List<FileData> files = input.getFiles();
        try {
            for (FileData docData : files) {
                File documentFile = docData.getFile();
                List<String[]> SentimentTags = new ArrayList<String[]>();
                String documentString = null;
                try {
                    documentString = JavaIO.readFile(documentFile);

                    documentString = documentString.toLowerCase();

                    for (int j = 0; j < Ngrams.size(); j++) {
                        if (documentString.contains(Ngrams.get(j))) {
                            //Label = "T";
                            //Find_P.add(Ngrams.get(j).split(" "));
                            String temp = Ngrams.get(j).replaceAll(" ", "_");
                            documentString = documentString.replaceAll(Ngrams.get(j), temp);
                        }
                    }
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                    return false;
                }

                Annotation document = new Annotation(documentString);

                pipeline.annotate(document);

                List<CoreMap> sentences = document.get(SentencesAnnotation.class);
                List<String[]> docWords = new ArrayList<String[]>();
                String[] wordPOS = new String[3];

                for (CoreMap sentence : sentences) {
                    // traversing the words in the current sentence
                    // a CoreLabel is a CoreMap with additional token-specific methods

                    final List<CoreLabel> sent = sentence.get(TokensAnnotation.class);
                    final List<TaggedWord> taggedWords = POSTagger.tag(sent, "en");
                    for (TaggedWord token : taggedWords) {
                        // this is the text of the token
                        String word = token.word();
                        // this is the POS tag of the token

                        if (word.matches("\\W")) {
                            if (!word.matches("[0-9a-zA-Z.;:\'\"]*")) {
                                continue;
                            }
                        }

                        if (word.matches("[;.\'\"]")) {
                            continue;
                        }

                        // this is the POS tag of the token
                        String pos = token.tag();

                        wordPOS[0] = word;
                        if (pos.contains("JJ")) {
                            wordPOS[1] = "adj";
                        } else if (pos.contains("NN")) {
                            wordPOS[1] = "noun";
                        } else if (pos.contains("VB")) {
                            wordPOS[1] = "verb";
                        } else if (pos.contains("RB")) {
                            wordPOS[1] = "adverb";
                        } else {
                            wordPOS[1] = "anypos";
                        }
                        wordPOS[2] = pos;
                        docWords.add(wordPOS.clone());

                    }
                }

                // documentString = documentString.replaceAll(",",",");
                ArrayList<String[]> outputList = new ArrayList<String[]>();
                for (String[] word_and_pos : docWords) {
                    String[] word_temp = {word_and_pos[0], word_and_pos[1]};
                    SentimentObj word_pos_obj = new SentimentObj(word_temp);

                    //System.out.println (word_temp[0]);
                    if (sentiMap1.containsKey(word_pos_obj)) {
                        //System.out.println (word_temp[0]);
                        String polarity = ((String[]) sentiMap1.get(word_pos_obj))[0];
                        String color = "000000";
                        if (polarity.equals("positive")) {
                            color = "(0,230,0)";
                        }
                        if (polarity.equals("neutral")) {
                            color = "(0,0,230)";
                        }
                        if (polarity.equals("negative")) {
                            color = "(230,0,0)";
                        }
                        String[] sentiTag = {word_and_pos[0], word_and_pos[2], polarity, Integer.toString(1)};
                        SentimentTags.add(sentiTag.clone());
                        //System.out.println(SentimentTags.size());
                        String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(word_and_pos[0]) + " </span>", color};
                        outputList.add(tempArray);

                    } else if (sentiMap_swords.containsKey(word_temp[0])) {
                        //System.out.println (word_temp[0]);
                        String polarity = ((String[]) sentiMap_swords.get(word_temp[0]))[0];
                        String color = "000000";
                        if (polarity.equals("positive")) {
                            color = "(0,230,0)";
                        }
                        if (polarity.equals("neutral")) {
                            color = "(0,0,230)";
                        }
                        if (polarity.equals("negative")) {
                            color = "(230,0,0)";
                        }
                        String[] sentiTag = {word_and_pos[0], word_and_pos[2], polarity, Integer.toString(1)};
                        //System.out.println(SentimentTags.size());

                        SentimentTags.add(sentiTag.clone());
                        String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(word_and_pos[0]) + " </span>", color};
                        outputList.add(tempArray);

                        //System.out.println(SentimentTags.size());
                    } else if (sentiMap_nwords.containsKey(word_temp[0])) {
                        //System.out.println (word_temp[0]);
                        String polarity = ((String[]) sentiMap_nwords.get(word_temp[0]))[0];
                        String color = "000000";
                        if (polarity.equals("positive")) {
                            color = "(0,230,0)";
                        }
                        if (polarity.equals("neutral")) {
                            color = "(0,0,230)";
                        }
                        if (polarity.equals("negative")) {
                            color = "(230,0,0)";
                        }
                        String phrase = word_and_pos[0].replaceAll("_", " ");
                        String[] sentiTag = {phrase, word_and_pos[2], polarity, Integer.toString(1)};
                        //System.out.println(SentimentTags.size());

                        SentimentTags.add(sentiTag.clone());
                        String[] tempArray = {"<span style=\"color:rgb" + color + "\">" + String.valueOf(phrase) + " </span>", color};
                        outputList.add(tempArray);

                        //System.out.println(SentimentTags.size());
                    } else {
                        String[] tempArray = {word_and_pos[0], null};
                        outputList.add(tempArray);
                    }
                }

                if (SentimentTags.size() > 0) {
                    //System.out.println (SentimentTags.size());
                    toAggregate.add(SentimentTags);// Adding the list of words from text with sentiment
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }

        List<String[]> ToHumanCSVArray = new CorpusAggregator().CorpusAggregate(toAggregate); // collate the list of words/POSs with sentiment so that
        // each word-POS pair has the proper count.
        ToHumanCSV.add("Term,Part Of Speech,Sentiment,Frequency");
        for (String[] HumanArray : ToHumanCSVArray) {
            String HumanTemp = HumanArray[0] + "," + HumanArray[1] + "," + HumanArray[2] + "," + HumanArray[3];
            //System.out.println (HumanTemp);
            ToHumanCSV.add(HumanTemp);
        }

        return true;
    }

    /**
     *
     * @return
     */
    public List<String> GetCSV() {
        return ToCSV;
    }

    /**
     *
     * @return
     */
    public List<String> GetHumanCSV() {
        return ToHumanCSV;
    }

    /**
     *
     * @param filepath
     */
    public void writeOutput(String filepath) {
        this.writeCsv(GetHumanCSV(), filepath);
    }

    private void writeCsv(List<String> ToHumanCSV, String filepath) {
        StringBuffer sb = new StringBuffer();
        String toWrite = "";
        for (int i1 = 0; i1 < ToHumanCSV.size(); i1++) {
            toWrite = ToHumanCSV.get(i1) + "\n";
            sb.append(toWrite);
        }
        
        // 2016.03 Add this code to delete existing file
        File toDelete = new File(filepath);
        	if (toDelete.exists()) {
        		toDelete.delete(); 
        	}
        //
        
        FileData.writeDataIntoFile(sb.toString(), filepath);
    }

}