/**
*
*/
package context.core.task.wordcloud;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Formatter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.TreeSet;
import org.apache.commons.io.FileUtils;
import cc.mallet.types.IDSorter;
import au.com.bytecode.opencsv.CSVReader;
import context.app.AppConfig;
import context.core.entity.CorpusData;
import context.core.entity.FileData;
import context.core.task.topicmodeling.MalletTopicModeling;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
/**
* @author Shubhanshu
*
*/
public class ClusteredWordCloud {
/**
*
*/
private List<List<String[]>> toAggregate;
private String stopListLoc;
private StanfordCoreNLP pipeline;
private List<String[]> WordWeights;
private String sentimentLoc;
private Boolean isCluster;
private int numIters;
private int numTopics;
private int wordPerTopic;
public int numOptInterval;
public double sumAlpha;
private int width;
private int height;
private int minFontSize;
private WordCloudTaskInstance instance;
private CorpusData input;
private boolean isTff;
private boolean isLowercase;
public class SentimentTopicWord{
String word;
int sentiment, topic;
double weight, fitVal;
public SentimentTopicWord(String word){
this.word = word;
}
public SentimentTopicWord(String word, int sentiment,
int topic, double weight, double fitVal){
this.word = word;
this.sentiment = sentiment;
this.topic = topic;
this.weight = weight;
this.fitVal = fitVal;
}
@Override
public String toString(){
Formatter out = new Formatter(new StringBuilder(), Locale.US);
out.format("{text: \"%s\",topic:%d,sentiment:%d,frequency:%.3f,fitVal:%.4f},",
word, topic, sentiment, weight, fitVal);
return out.toString();
}
}
private List<SentimentTopicWord> wordList;
private static HashMap<String, List<String>> sentimentDict;
private void initSentimentDict(){
try {
CSVReader reader = new CSVReader(new FileReader(sentimentLoc));
sentimentDict = new HashMap<String, List<String>>();
String[] line = reader.readNext(); // Read the header
/**
* Row Format:
* Word,POS,Stemmed,Priorpolarity,Type
*/
while((line = reader.readNext()) != null){
if(this.isLowercase){
sentimentDict.put(line[0].toLowerCase(), Arrays.asList(line).subList(1, 4));
} else {
sentimentDict.put(line[0], Arrays.asList(line).subList(1, 4));
}
}
reader.close();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
private int getSentiment(String word){
String polarity = "";
if(sentimentDict.containsKey(word)){
polarity = sentimentDict.get(word).get(2);
switch (polarity) {
case "positive":
return 1;
case "neutral":
return 0;
case "negative":
return -1;
default:
return 2;
}
}
return 2;
}
private class TopicStruct {
int id;
double fitVal;
};
private TopicStruct[] topicData;
public int getIndexForId(TopicStruct[] topicData, int id) {
for (int i = 0; i < topicData.length; i++) {
if (topicData[i].id == id) {
return i;
}
}
return -1;
}
public double getFitValForId(TopicStruct[] topicData, int id) {
for (int i = 0; i < topicData.length; i++) {
if (topicData[i].id == id) {
return topicData[i].fitVal;
}
}
return 0.0;
}
public ClusteredWordCloud(WordCloudTaskInstance instance) {
// TODO Auto-generated constructor stub
this.instance = instance;
init();
}
private void init() {
this.input = (CorpusData) instance.getInput();
this.toAggregate = new ArrayList<List<String[]>>();
this.WordWeights = new ArrayList<String[]>();
this.stopListLoc = instance.getStopListLoc();
this.pipeline = instance.getPipeline();
this.sentimentLoc = instance.getSentimentLoc();
this.numIters = instance.getNumIters();
this.numTopics = instance.getNumTopics();
this.wordPerTopic = instance.getWordPerTopic();
this.numOptInterval=instance.getNumOptInterval();
this.sumAlpha=instance.getSumAlpha();
this.width = instance.getWidth();
this.height = instance.getHeight();
this.minFontSize = instance.getMinFontSize();
this.initSentimentDict();
this.isLowercase = instance.getIsLowercase();
}
public boolean genCloud() {
List<FileData> files = input.getFiles();
try {
MalletTopicModeling mtm = new MalletTopicModeling(numTopics, wordPerTopic,
numIters,numOptInterval,sumAlpha, files, stopListLoc, isLowercase);
ArrayList<TreeSet<IDSorter>> topicSortedWords = mtm.model.getSortedWords();
wordList = new ArrayList<ClusteredWordCloud.SentimentTopicWord>();
// Get weight of top words
for (int i = 0; i < numTopics; i++) {
IDSorter topic = mtm.sortedTopics[i];
Iterator<IDSorter> iterator = topicSortedWords.get(topic.getID()).iterator();
int rank = 0;
while (iterator.hasNext() && rank < wordPerTopic) {
IDSorter idCountPair = iterator.next();
SentimentTopicWord w = new SentimentTopicWord(
(String)mtm.dataAlphabet.lookupObject(idCountPair.getID()));
w.weight = idCountPair.getWeight();
w.topic = i;
w.fitVal = topic.getWeight();
w.sentiment = getSentiment(w.word);
wordList.add(w); // Append word to the list
rank++;
}
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("Error in generating word cloud:" + e.getMessage());
return false;
}
return true;
}
/**
*
* @param outputDirectory
* @return
*/
public boolean writeOutput(String outputDirectory) {
//Write CSV
return this.writeJavaScript(getJavaScript(), outputDirectory);
}
private String getJavaScript() {
String lineSep = System.getProperty("line.separator");
int topicCount[] = new int[numTopics];
for (int i = 0; i < numTopics; i++) {
topicCount[i] = wordPerTopic;
}
String jscriptCluster = "var item_count = " + Integer.toString(numTopics) + ";" + lineSep
+ "var word_per_item = " + Integer.toString(wordPerTopic) + ";" + lineSep
+ "var width = " + Integer.toString(width) + "," + lineSep
+ " height = " + Integer.toString(height) + "," + lineSep
+ "\tfontSize = " + Integer.toString(minFontSize) + ";" + lineSep
+ lineSep + " var wordList = [" + lineSep;
for(SentimentTopicWord w: wordList){
jscriptCluster += "\t"+w.toString()+lineSep;
}
jscriptCluster += "];";
return jscriptCluster;
}
private boolean writeJavaScript(String javascriptForHTML, String outputDirectory) {
File D3WordCloudHTML = null;
File dirFile = new File(outputDirectory + "/WordCloud");
if (!dirFile.isDirectory()) {
dirFile.mkdirs();
}
File file = new File(dirFile.getAbsolutePath() + "/word_list.js");
File D3lib = new File(dirFile.getAbsolutePath() + "/d3.v3.min.js");
File D3Layout = new File(dirFile.getAbsolutePath() + "/d3.layout.cloud.js");
File D3WordCloudJS = new File(dirFile.getAbsolutePath() + "/word_cloud.js");
D3WordCloudHTML = new File(dirFile.getAbsolutePath() + "/wordCloud.html");
File D3libOrig = new File(AppConfig.getUserDirLoc() + "/data/WordClouds/word_cloud/d3.v3.min.js");
File D3LayoutOrig = new File(AppConfig.getUserDirLoc() + "/data/WordClouds/word_cloud/d3.layout.cloud.js");
File D3WordCloudJSOrig = new File(AppConfig.getUserDirLoc() + "/data/WordClouds/word_cloud/word_cloud.js");
File D3WordCloudHTMLOrig = new File(AppConfig.getUserDirLoc() + "/data/WordClouds/word_cloud/wordCloud.html");
try {
FileUtils.copyFile(D3libOrig, D3lib);
FileUtils.copyFile(D3LayoutOrig, D3Layout);
FileUtils.copyFile(D3WordCloudJSOrig, D3WordCloudJS);
FileUtils.copyFile(D3WordCloudHTMLOrig, D3WordCloudHTML);
} catch (IOException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
return false;
}
// if file doesnt exists, then create it
if (!file.exists()) {
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace(System.out);
return false;
}
}
FileWriter fw = null;
try {
fw = new FileWriter(file.getAbsoluteFile());
} catch (IOException e) {
e.printStackTrace(System.out);
return false;
}
BufferedWriter bw = new BufferedWriter(fw);
try {
bw.write(javascriptForHTML);
} catch (IOException e) {
e.printStackTrace(System.out);
return false;
}
try {
bw.close();
} catch (IOException e) {
e.printStackTrace(System.out);
return false;
}
return true;
}
}