package edu.harvard.wcfia.yoshikoder.reporting; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import javax.swing.JFrame; import javax.swing.JOptionPane; import javax.swing.JScrollPane; import javax.swing.JTable; import edu.harvard.wcfia.yoshikoder.document.YKDocument; import edu.harvard.wcfia.yoshikoder.document.YKDocumentFactory; import edu.harvard.wcfia.yoshikoder.document.tokenizer.Token; import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList; import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenizationService; public class WordFrequencyMap { protected HashMap map; protected int total; /* public WordFrequencyMap(){ map = new HashMap(); total = 0; } */ public WordFrequencyMap(TokenList tl){ map = new HashMap(); total = 0; for (Iterator iter = tl.iterator(); iter.hasNext();) { Token token = (Token) iter.next(); String text = token.getText().toLowerCase(); incrementWordCount(text, 1); // creates if necessary } } /** * Increments the count associated with a word. Creates an entry * in the map if necessary * @param word word to add a count to * @param inc how much to add */ public void incrementWordCount(String word, int inc){ Integer count = (Integer)map.get(word); if (count == null) count = new Integer(inc); else count = new Integer(count.intValue() + inc); map.put(word, count); total += inc; } public Integer getWordCount(Token token){ return (Integer)map.get(token.getText().toLowerCase()); } public Integer getWordCount(String word){ return (Integer)map.get(word.toLowerCase()); } public Double getWordProportion(String word){ Integer i = (Integer)map.get(word.toLowerCase()); if (i == null || total == 0) return null; else return new Double(i.doubleValue() / total); } /** * Provides an interator over the (String) words types in this map * @return iterator */ public Iterator getVocabularyIterator(){ return map.keySet().iterator(); } public List getVocabularyList(){ List arr = new ArrayList(map.keySet()); return arr; } public List getSortedVocabularyList(){ List arr = getVocabularyList(); Collections.sort(arr); return arr; } public int getTotal(){ return total; } public int size(){ return map.size(); } public String toString(){ return "TokenFrequencyMap: total=" + total + "\n" + map.toString(); } public static void main(String[] args) throws Exception { YKDocument doc = YKDocumentFactory.createDummyDocument("Foo", "Mary and her lamb. Mary's Lamb.", "UTF-8"); TokenList tl = TokenizationService.getTokenizationService().tokenize(doc); WordFrequencyMap map = new WordFrequencyMap(tl); System.out.println(map); DocumentFrequencyReport rep = new DocumentFrequencyReport("title", "desc", "dictionary", doc, map); JOptionPane.showMessageDialog((JFrame)null, new JScrollPane(new JTable(rep))); System.exit(0); } }