package edu.usc.cssl.tacit.cluster.hierarchical.services;
import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
import javax.imageio.ImageIO;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import org.eclipse.core.runtime.OperationCanceledException;
import org.eclipse.core.runtime.SubProgressMonitor;
import weka.clusterers.HierarchicalClusterer;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;
import weka.gui.hierarchyvisualizer.HierarchyVisualizer;
import edu.usc.cssl.tacit.common.TacitUtility;
import edu.usc.cssl.tacit.common.ui.views.ConsoleView;
public class HierarchicalClusterAnalysis {
public static String doClustering(List<File> inputFiles, String outputPath,
boolean saveImg, SubProgressMonitor subProgressMonitor, Date dateObj) {
try {
DateFormat df = new SimpleDateFormat("MM-dd-yy-HH-mm-ss");
StringToWordVector filter = new StringToWordVector();
HierarchicalClusterer aggHierarchical = new HierarchicalClusterer();
FastVector atts = new FastVector(1);
atts.addElement(new Attribute("text", (FastVector) null));
Instances docs = new Instances("text_files", atts, 0);
ConsoleView.printlInConsoleln(outputPath);
for (int i = 0; i < inputFiles.size(); i++) {
try {
double[] newInst = new double[1];
String content = new Scanner(inputFiles.get(i))
.useDelimiter("\\Z").next();
newInst[0] = (double) docs.attribute(0).addStringValue(
content);
docs.add(new Instance(1.0, newInst));
} catch (Exception e) {
ConsoleView.printlInConsoleln("Exception occurred in reading files"
+ e);
}
}
filter.setInputFormat(docs);
Instances filteredData = Filter.useFilter(docs, filter);
aggHierarchical.setNumClusters(1);
aggHierarchical.setPrintNewick(true);
subProgressMonitor.subTask("Building cluster");
aggHierarchical.buildClusterer(filteredData);
subProgressMonitor.worked(20);
String g = aggHierarchical.graph();
String output = formatGraph(g, inputFiles);
ConsoleView.printlInConsoleln("Network " + output);
subProgressMonitor.subTask("Formating Image");
aggHierarchical.linkTypeTipText();
subProgressMonitor.worked(15);
HierarchyVisualizer tv = new HierarchyVisualizer(output);
tv.setSize(1024, 1024);
JFrame f;
f = new JFrame();
JPanel container = new JPanel();
JScrollPane scrPane = new JScrollPane(container);
Container contentPane = f.getContentPane();
contentPane.setLayout(new BorderLayout());
f.getContentPane().add(scrPane);
contentPane.add(tv, BorderLayout.CENTER);
f.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
f.setSize(1024, 1024);
f.setVisible(true);
tv.fitToScreen();
if (saveImg) {
try {
BufferedImage image = new BufferedImage(
contentPane.getWidth(), contentPane.getHeight(),
BufferedImage.TYPE_INT_RGB);
Graphics2D graphics2D = image.createGraphics();
contentPane.printAll(graphics2D);
graphics2D.dispose();
subProgressMonitor.subTask("Saving image @ " + outputPath
+ File.separator
+ "Hierarchical Clustering Output "+df.format(dateObj)+".jpeg");
ImageIO.write(image, "jpeg", new File(outputPath
+ File.separator
+ "Hierarchical Clustering Output "+df.format(dateObj)+".jpeg"));
subProgressMonitor.worked(10);
} catch (Exception e) {
System.out
.println("Exception occurred in saving image of output "
+ e);
}
}
BufferedWriter buf = new BufferedWriter(new FileWriter(new File(
outputPath + File.separator + "hierarchical-cluster-"+df.format(dateObj)+".txt")));
buf.write("Mapping of document ID to actual names\n");
for (int i = 0; i < inputFiles.size(); i++) {
buf.write((i + 1) + " " + inputFiles.get(i).getName() + "\n");
}
buf.write(output);
buf.close();
subProgressMonitor.done();
return output;
} catch (Exception e) {
System.out
.println("Exception occurred in Hierarchical Clustering "
+ e);
}
return null;
}
public static String formatGraph(String graph, List<File> files) {
StringBuffer fgraph = new StringBuffer();
String input = graph.substring(7);
int i = 0, len = input.length();
char c;
int count = 0;
fgraph.append(graph.substring(0, 7));
ConsoleView.printlInConsoleln(graph);
while (i < len) {
c = input.charAt(i);
if (c == '(') {
fgraph.append(input.charAt(i++));
} else if (c == ':') {
if (input.charAt(i - 1) != ')') {
fgraph.append(++count);
}
while (i < len
&& (input.charAt(i) != ',' && input.charAt(i) != '(')) {
fgraph.append(input.charAt(i++));
// ConsoleView.writeInConsole(fgraph.toString());
}
if (i < len)
fgraph.append(input.charAt(i++));
} else {
i++;
}
}
ConsoleView.printlInConsoleln(fgraph.toString());
return fgraph.toString();
}
public static String runClustering(List<File> listOfFiles,
String fOutputDir, boolean fSaveImg,
SubProgressMonitor subProgressMonitor, Date dateObj) {
DateFormat df = new SimpleDateFormat("MM-dd-yy-HH-mm-ss");
List<File> inputFiles = new ArrayList<File>();
for (File f : listOfFiles) {
if (f.getAbsolutePath().contains("DS_Store"))
continue;
if (!f.isDirectory() && f.exists())
inputFiles.add(f);
}
subProgressMonitor.beginTask("Running CLustering", 50);
subProgressMonitor.subTask("Running Hierarchical Clustering...");
ConsoleView.printlInConsoleln("Running Hierarchical Clustering...");
String clusters = doClustering(inputFiles, fOutputDir, fSaveImg,
new SubProgressMonitor(subProgressMonitor, 45), dateObj);
if (subProgressMonitor.isCanceled()) {
throw new OperationCanceledException();
}
if (clusters == null) {
return null;
}
ConsoleView.printlInConsoleln("Output for Hierarchical Clustering");
ConsoleView.printlInConsoleln("Mapping of document ID to actual names");
subProgressMonitor.subTask("Mapping of document ID to actual names");
for (int i = 0; i < inputFiles.size(); i++) {
ConsoleView.printlInConsoleln((i + 1) + " " + inputFiles.get(i).getName());
}
subProgressMonitor.worked(5);
ConsoleView.printlInConsoleln("Clusters formed: \n");
ConsoleView.printlInConsoleln(clusters);
ConsoleView.printlInConsoleln("Saving the output to hierarchical-cluster-"+df.format(dateObj)+".txt");
subProgressMonitor.subTask("Saving the output to hierarchical-cluster-"+df.format(dateObj)+".txt");
ConsoleView.printlInConsoleln("\nDone Hierarchical Clustering...");
TacitUtility.createRunReport(fOutputDir, "Hierarchical Clustering",dateObj);
subProgressMonitor.worked(5);
subProgressMonitor.done();
return clusters;
}
}