package edu.harvard.wcfia.yoshikoder.reporting;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import javax.swing.JOptionPane;
import javax.swing.JScrollPane;
import javax.swing.JTable;
import edu.harvard.wcfia.yoshikoder.document.DocumentList;
import edu.harvard.wcfia.yoshikoder.document.DocumentListImpl;
import edu.harvard.wcfia.yoshikoder.document.YKDocument;
import edu.harvard.wcfia.yoshikoder.document.YKDocumentFactory;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.BITokenizerImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.Tokenizer;
public class UnifiedDocumentFrequencyReport extends AbstractReport {
private static Logger log =
Logger.getLogger("edu.harvard.wcfia.yoshikoder.reporting.UnifiedDocumentFrequencyReport");
protected WordFrequencyMap[] maps;
public UnifiedDocumentFrequencyReport(String reportTitle, String desc,
DocumentList dl, WordFrequencyMap[] m) {
super(reportTitle, desc, "No dictionary", dl);
log.info("got " + dl.size() + " documents and " + m.length + "word frequency maps");
documentList = dl;
maps = m;
data = initData();
}
protected Object[][] initData(){
Set vocab = new HashSet();
for (int ii = 0; ii < documentList.size(); ii++) {
WordFrequencyMap map = maps[ii];
List v = map.getVocabularyList();
for (Iterator iter = v.iterator(); iter.hasNext();) {
String word = (String) iter.next();
vocab.add(word);
}
}
List vocabulary = new ArrayList(vocab);
Collections.sort(vocabulary);
Object[][] report =
new Object[vocabulary.size()][documentList.size()*2+1];
int row = 0;
for (Iterator iter = vocabulary.iterator(); iter.hasNext();) {
String word = (String) iter.next();
report[row][0] = word;
for (int ii = 0; ii < documentList.size(); ii++) {
Integer count = maps[ii].getWordCount(word);
if (count != null){
report[row][ii + 1] = new Double(count.doubleValue());
report[row][ii + documentList.size() + 1] = (maps[ii].getWordProportion(word).doubleValue());
} else {
report[row][ii + 1] = new Double(0);
report[row][ii + documentList.size() + 1] = report[row][ii + 1];
}
}
row++;
}
return report;
}
public Class getColumnClass(int columnIndex) {
if (columnIndex == 0)
return String.class;
else
return Double.class;
}
public String getColumnName(int columnIndex) {
if (columnIndex == 0)
return "Word";
else if (columnIndex > documentList.size())
return "Prop. (" + (columnIndex - documentList.size()) + ")";
else
return "Count (" + (columnIndex) + ")";
}
public static void main(String[] args) throws Exception{
YKDocument d1 = YKDocumentFactory.createDummyDocument("D1", "Mary had a little lamb. Mary had some more", "UTF-8");
YKDocument d2 = YKDocumentFactory.createDummyDocument("D1", "Jackie had a little beef. Jackie whined some more", "UTF-8");
DocumentList dl = new DocumentListImpl();
dl.add(d1);
dl.add(d2);
Tokenizer tok = new BITokenizerImpl();
WordFrequencyMap wd1 = new WordFrequencyMap(tok.getTokens(d1.getText()));
WordFrequencyMap wd2 = new WordFrequencyMap(tok.getTokens(d2.getText()));
UnifiedDocumentFrequencyReport rep = new UnifiedDocumentFrequencyReport("title", "desc",
dl, new WordFrequencyMap[]{wd1, wd2});
JTable table = new JTable(rep);
JOptionPane.showMessageDialog(null, new JScrollPane(table));
}
}