/* * Copyright 2013 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.frequency.tfidf.util; import java.io.Serializable; import java.util.*; import java.util.Map.Entry; /** * An object that counts objects. Inspired by NLTKs FreqDist-class. * * * @param <T> * The type of element which is counted. */ public class FreqDist<T> implements Serializable { private static final long serialVersionUID = 9155968779719980277L; private Map<T, Integer> counts; private int totalCount = 0; /** * Instantiate a FreqDist and count the given collection. * * @param collection * a collection. */ public FreqDist(Collection<T> collection) { super(); counts = new HashMap<T, Integer>(); count(collection); } /** * Creates an empty FreqDist. */ public FreqDist() { super(); counts = new HashMap<T, Integer>(); } /** * Returns a mapping of elements to probabilities of their occurrence. * * @return probabilities of occurrence for every element */ public Map<T, Double> getProbabilities() { Map<T, Double> probabilities = new HashMap<T, Double>(); for (Entry<T, Integer> e : counts.entrySet()) { probabilities.put(e.getKey(), (double) e.getValue() / totalCount); } return probabilities; } /** * Returns the count of the given element * * @param element * an element. * @return the count */ public int getCount(T element) { if (counts.containsKey(element)) return counts.get(element); else return 0; } /** * Increment the count for the given element. * * @param element * the element to be counted */ public void count(T element) { count(element, 1); } /** * Increment the count by number for the given element. * * @param element * the element to be counted * @param number * an increment. */ public void count(T element, int number) { totalCount += number; if (counts.containsKey(element)) { counts.put(element, counts.get(element) + number); } else { counts.put(element, number); } } /** * Increment the counts for all elements contained in the collection. When elements are * contained multiple times, they are counted multiple times as well. * * @param collection * a collection of elements */ public void count(Collection<T> collection) { for (T element : collection) { count(element); } } /** * Returns the accumulated count of all elements. * * @return the total count */ public int getTotalCount() { return totalCount; } }