/** * Title: StanfordMaxEnt<p> * Description: A Maximum Entropy Toolkit<p> * Copyright: Copyright (c) Kristina Toutanova<p> * Company: Stanford University<p> */ package edu.stanford.nlp.tagger.maxent; import edu.stanford.nlp.io.RuntimeIOException; import edu.stanford.nlp.stats.IntCounter; import edu.stanford.nlp.util.Generics; import java.io.IOException; import java.util.Map; import java.io.DataInputStream; import java.io.DataOutputStream; /** * This class was created to store the possible tags of a word along with how many times * the word appeared with each tag. * * @author Kristina Toutanova * @version 1.0 */ class TagCount { private Map<String, Integer> map = Generics.newHashMap(); private int ambClassId = -1; /* This is a numeric ID shared by all words that have the same set of possible tags. */ private String[] getTagsCache; // = null; private int sumCache; private TagCount() { } // used internally TagCount(IntCounter<String> tagCounts) { for (String tag : tagCounts.keySet()) { map.put(tag, tagCounts.getIntCount(tag)); } getTagsCache = map.keySet().toArray(new String[map.keySet().size()]); sumCache = calculateSumCache(); } private static final String NULL_SYMBOL = "<<NULL>>"; /** * Saves the object to the file. * * @param rf is a file handle * Supposedly other objects will be written after this one in the file. The method does not close the file. The TagCount is saved at the current position. */ protected void save(DataOutputStream rf) { try { rf.writeInt(map.size()); for (String tag : map.keySet()) { if (tag == null) { rf.writeUTF(NULL_SYMBOL); } else { rf.writeUTF(tag); } rf.writeInt(map.get(tag)); } } catch (Exception e) { e.printStackTrace(); } } public void setAmbClassId(int ambClassId) { this.ambClassId = ambClassId; } public int getAmbClassId() { return ambClassId; } /** A TagCount object's fields are read from the file. They are read from * the current position and the file is not closed afterwards. */ public static TagCount readTagCount(DataInputStream rf) { try { TagCount tc = new TagCount(); int numTags = rf.readInt(); tc.map = Generics.newHashMap(numTags); for (int i = 0; i < numTags; i++) { String tag = rf.readUTF(); int count = rf.readInt(); if (tag.equals(NULL_SYMBOL)) tag = null; tc.map.put(tag, count); } tc.getTagsCache = tc.map.keySet().toArray(new String[tc.map.keySet().size()]); tc.sumCache = tc.calculateSumCache(); return tc; } catch (IOException e) { throw new RuntimeIOException(e); } } /** * @return the number of total occurrences of the word . */ protected int sum() { return sumCache; } // Returns the number of occurrence of a particular tag. protected int get(String tag) { Integer count = map.get(tag); if (count == null) { return 0; } return count; } private int calculateSumCache() { int s = 0; for (Integer i : map.values()) { s += i; } return s; } /** * @return an array of the tags the word has had. */ public String[] getTags() { return getTagsCache; //map.keySet().toArray(new String[0]); } protected int numTags() { return map.size(); } /** * @return the most frequent tag. */ public String getFirstTag() { String maxTag = null; int max = 0; for (String tag : map.keySet()) { int count = map.get(tag); if (count > max) { maxTag = tag; max = count; } } return maxTag; } @Override public String toString() { return map.toString(); } }