/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.util; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; /** * Maintains element co-occurrence data. * * @author Lane Schwartz * @author Chris Callison-Burch * @param <A> * @param <B> */ public class Counts<A, B> implements Iterable<Pair<A,B>> { /** * Stores the number of times instances of A and B co-occur. */ private Map<A,Map<B,Integer>> counts; /** Stores the number of times instances of B occur. */ private Map<B,Integer> bTotals; /** Stores relative frequency estimates for p(A | B). */ private Map<A,Map<B,Float>> probabilities; /** Stores relative frequency estimates for p(B | A). */ private Map<B,Map<A,Float>> reverseProbabilities; /** Stores the value to return when an unseen pair is queried. */ private float floorProbability; /** * Constructs an initially empty co-occurrence counter, * with floor probability set to <code>Float.MIN_VALUE</code>. */ public Counts() { this(Float.MIN_VALUE); } /** * Constructs an initially empty co-occurrence counter. * * @param floorProbability Floor probability to use when * an unseen pair is queried. */ public Counts(float floorProbability) { this.floorProbability = floorProbability; this.counts = new HashMap<A,Map<B,Integer>>(); this.bTotals = new HashMap<B,Integer>(); this.probabilities = new HashMap<A,Map<B,Float>>(); this.reverseProbabilities = new HashMap<B,Map<A,Float>>(); } /** * Increments the co-occurrence count of the provided * objects. * * @param a * @param b */ public void incrementCount(A a, B b) { // increment the count and handle the adding of objects to the map if they aren't already there { Map<B,Integer> bMap; if (counts.containsKey(a)) { bMap = counts.get(a); } else { bMap = new HashMap<B,Integer>(); counts.put(a, bMap); } Integer previousCount; if (bMap.containsKey(b)) { previousCount = bMap.get(b); } else { previousCount = 0; } bMap.put(b, previousCount+1); } // increments total for o2. { Integer previousTotal; if (bTotals.containsKey(b)) { previousTotal = bTotals.get(b); } else { previousTotal = 0; } bTotals.put(b, previousTotal+1); } // Invalidate previously calculated probabilities { if (probabilities.containsKey(a)) { probabilities.get(a).clear(); } if (reverseProbabilities.containsKey(b)) { reverseProbabilities.get(b).clear(); } } } /** * Gets the co-occurrence count for the two elements. * * @param a * @param b * @return the co-occurrence count for the two elements */ public int getCount(A a, B b) { int count = 0; if (counts.containsKey(a)) { Map<B,Integer> bMap = counts.get(a); if (bMap.containsKey(b)) { count = bMap.get(b); } } return count; } /** * Gets the total number of times the specified element has * been seen. * * @param b * @return the total number of times the specified element * has been seen */ int getCount(B b) { return (bTotals.containsKey(b) ? bTotals.get(b) : 0); } /** * Gets the probability of a given b. * <p> * This value is the relative frequency estimate. * * @param a * @param b * @return the probability of a given b. */ public float getProbability(A a, B b) { int count = getCount(a, b); int bCount = getCount(b); Float value; if (count==0 || bCount==0) { value = floorProbability; } else { Map<B,Float> bMap; if (probabilities.containsKey(a)) { bMap = probabilities.get(a); } else { bMap = new HashMap<B,Float>(); } if (bMap.containsKey(b)) { value = bMap.get(b); } else { value = (float) count / (float) getCount(b); bMap.put(b, value); } } return value; } /** * Gets the probability of b given a. * <p> * This value is the relative frequency estimate in the * reverse direction. * * @param b * @param a * @return the probability of b given a. */ public float getReverseProbability(B b, A a) { int count = getCount(a,b); Float value = floorProbability; if (count > 0) { int aCount = 0; for (Integer aValue : counts.get(a).values()) { aCount += aValue; } if (aCount > 0) { Map<A,Float> aMap; if (reverseProbabilities.containsKey(b)) { aMap = reverseProbabilities.get(b); } else { aMap = new HashMap<A,Float>(); } if (aMap.containsKey(a)) { value = aMap.get(a); } else { value = (float) count / (float) aCount; } } } return value; } /** * Gets the floor probability that is returned whenever an * unseen pair is queried. * * @return The floor probability that is returned whenever * an unseen pair is queried */ public float getFloorProbability() { return this.floorProbability; } public void writeExternal(ObjectOutput out) throws IOException { out.writeObject(counts); out.writeObject(bTotals); out.writeObject(probabilities); out.writeObject(reverseProbabilities); out.writeFloat(floorProbability); // out.close(); } public void readExternal(ObjectInput in) throws ClassNotFoundException, IOException { this.counts = (HashMap<A, Map<B, Integer>>) in.readObject(); this.bTotals = (HashMap<B,Integer>) in.readObject(); this.probabilities = (HashMap<A,Map<B,Float>>) in.readObject(); this.reverseProbabilities = (HashMap<B,Map<A,Float>>) in.readObject(); this.floorProbability = in.readFloat(); /* Map<Integer, Map<Integer, Integer>> ctMap = (HashMap<Integer,Map<Integer,Integer>>) in.readObject(); counts.setCounts(ctMap); // Read bTotals Map<Integer, Integer> btMap = (HashMap<Integer,Integer>) in.readObject(); counts.setBTotals(btMap); // Read probabilities Map<Integer, Map<Integer, Float>> pbMap = (HashMap<Integer,Map<Integer,Float>>) in.readObject(); counts.setProbabilities(pbMap); // Read reverse probabilities Map<Integer, Map<Integer, Float>> rpMap = (HashMap<Integer,Map<Integer,Float>>) in.readObject(); counts.setProbabilities(rpMap); */ } /* public void writeExternal(ObjectOutput out) throws IOException { // write floorProbability out.writeFloat(floorProbability); out.writeInt(counts.size()); for (Map.Entry<A,Map<B,Integer>> ae : counts.entrySet()) { String a; if (ae.getKey() == null) { a = "NULL"; } else { a = ae.getKey().toString(); } out.writeChars(a); out.writeInt(ae.getValue().size()); for (Map.Entry<B, Integer> be : ae.getValue().entrySet()) { String b; if (be.getKey() == null) { b = "NULL"; } else { b = be.getKey().toString(); } out.writeChars(b); out.writeInt(be.getValue()); } } // write bTotals: size followed by the object out.writeInt(bTotals.size()); for (Map.Entry<B,Integer> be : bTotals.entrySet()) { String b; if (be.getKey() == null) { b = "NULL"; } else { b = be.getKey().toString(); } out.writeChars(b); out.writeInt(be.getValue()); } // write probabilities: size followed by the object out.writeInt(probabilities.size()); for (Map.Entry<A, Map<B, Float>> ae : probabilities.entrySet()) { String a; if (ae.getKey() == null) { a = "NULL"; } else { a = ae.getKey().toString(); } out.writeChars(a); out.writeInt(ae.getValue().size()); for (Map.Entry<B, Float> be: ae.getValue().entrySet()) { String b; if (be.getKey() == null) { b = "NULL"; } else { b = be.getKey().toString(); } out.writeChars(b); out.writeFloat(be.getValue()); } } // write reverseProbabilities: size followed by the object out.writeInt(reverseProbabilities.size()); for (Map.Entry<B, Map<A, Float>> be : reverseProbabilities.entrySet()) { String b; if (be.getKey() == null) { b = "NULL"; } else { b = be.getKey().toString(); } out.writeChars(b); out.writeInt(be.getValue().size()); for (Map.Entry<A, Float> ae: be.getValue().entrySet()) { String a; if (ae.getKey() == null) { a = "NULL"; } else { a = ae.getKey().toString(); } out.writeChars(a); out.writeFloat(ae.getValue()); } } } */ /* public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { // Read counts int countsSize = in.readInt(); for (int i=0; i < countsSize; i++) { A a = (A) in.readObject(); int bMapSize = in.readInt(); Map<B, Integer> bMap = new HashMap<B,Integer>(); for (int j=0; j< bMapSize; j++) { B b = (B) in.readObject(); int pairCount = in.readInt(); bMap.put(b, pairCount); } counts.put(a, bMap); } // Read bTotals int bTotalsSize = in.readInt(); for (int i=0; i < bTotalsSize; i++) { B b = (B) in.readObject(); int bCount = in.readInt(); bTotals.put(b, bCount); } // Read probabilities int probSize = in.readInt(); for (int i=0; i < probSize; i++) { A a = (A) in.readObject(); int bMapSize = in.readInt(); Map<B, Float> bMap = new HashMap<B,Float>(); for (int j=0; j< bMapSize; j++) { B b = (B) in.readObject(); float pairProb = in.readFloat(); bMap.put(b, pairProb); } probabilities.put(a, bMap); } // Read reversed probabilities int revProbSize = in.readInt(); for (int i=0; i < revProbSize; i++) { B b = (B) in.readObject(); int aMapSize = in.readInt(); Map<A, Float> aMap = new HashMap<A,Float>(); for (int j=0; j< aMapSize; j++) { A a = (A) in.readObject(); float pairProb = in.readFloat(); aMap.put(a, pairProb); } reverseProbabilities.put(b, aMap); } // Read floorProbability floorProbability = in.readFloat(); } */ /** * Gets an iterator over all counted pairs. * <p> * The pairs are not guaranteed to be iterated over * in any particular order. * * @return an iterator over all counted pairs */ public Iterator<Pair<A, B>> iterator() { final Iterator<Entry<A,Map<B,Integer>>> aIterator = counts.entrySet().iterator(); return new Iterator<Pair<A,B>>() { Entry<A,Map<B,Integer>> entry = null; Iterator<B> bIterator = null; public boolean hasNext() { return (bIterator!=null && bIterator.hasNext()) || aIterator.hasNext(); } public Pair<A, B> next() { if (bIterator==null || ! bIterator.hasNext()) { entry = aIterator.next(); bIterator = entry.getValue().keySet().iterator(); } return new Pair<A,B>(entry.getKey(),bIterator.next()); } public void remove() { throw new UnsupportedOperationException(); } }; } }