/* * Copyright 2011 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.api.frequency.util; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; /** * Inspired by nltk.probability.ConditionalFreqDist Maps a condition to a * {@link FrequencyDistribution}. * * <p>This class could be used to learn how frequently a word collocates with another word. Suppose we * want to know how frequently the word "of" appears after the word "because", then * {@link ConditionalFrequencyDistribution} can be used as follows:</p> * * <blockquote><pre> * ConditionalFrequencyDistribution<String, String> cfd = * new ConditionalFrequencyDistribution<String, String>(); * * cfd.addSample("because", "in"); * cfd.addSample("because", "of"); * cfd.addSample("despite", "in"); * cfd.addSample("because", "of"); * * System.out.println(cfd.getCount("despite", "of")); * </pre></blockquote> * * <p>The last call to {@link ConditionalFrequencyDistribution#getCount} will yield 2, * because given the <code>condition</code> that the first word in a two-word sequence is "because", * the word "of" has appeared twice.</p> * * <p>This class was inspired by NLTK's <a href= * "http://nltk.googlecode.com/svn/trunk/doc/api/nltk.probability.ConditionalFreqDist-class.html"> * FreqDist</a>.</p> * * @param <C> * the type of the conditions * @param <V> * the type of the samples * @see FrequencyDistribution * */ public class ConditionalFrequencyDistribution<C, V> { private Map<C, FrequencyDistribution<V>> cfd; /** The total number of samples of all FrequencyDistributions. */ private long n; /** * Creates a new empty {@link ConditionalFrequencyDistribution}. */ public ConditionalFrequencyDistribution() { cfd = new HashMap<C, FrequencyDistribution<V>>(); n = 0; } /** * Creates a new {@link ConditionalFrequencyDistribution} and fills it with samples from a map. * * @param samples * the {@link Iterable} to fill from */ public ConditionalFrequencyDistribution(Map<C, Iterable<V>> samples) { this(); for (Map.Entry<C, Iterable<V>> entry : samples.entrySet()) { incAll(entry.getKey(), entry.getValue()); } } /** * @return The total number of sample outcomes that have been recorded by this * ConditionalFreqDist. */ public long getN() { return n; } /** * Returns the total number of samples which equal a given <code>sample</code> under a given * <code>condition</code>. * * <p> * If there are no samples for the condition in question on record, <code>0</code> will be * returned. * * @param condition * the condition * @param sample * the sample under a given condition * @return the number of all samples which equal <code>sample</code> */ public long getCount(C condition, V sample) { if (cfd.containsKey(condition)) { return cfd.get(condition).getCount(sample); } else { return 0; } } /** * Returns the {@link FrequencyDistribution} under a given <code>condition</code>, or * <code>null</code> if this distribution contains no such {@link FrequencyDistribution} for * this <code>condition</code>. * * @param condition * the condition * @return the distribution the condition maps to */ public FrequencyDistribution<V> getFrequencyDistribution(C condition) { return cfd.get(condition); } /** * Directly set the frequency distribution for a given condition. * * @param condition * the condition. * @param fd * the distribution. */ public void setFrequencyDistribution(C condition, FrequencyDistribution<V> fd) { cfd.put(condition, fd); } /** * Returns all conditions for which samples have been recorded. * * @return a {@link Set} of all recorded conditions */ public Set<C> getConditions() { return this.cfd.keySet(); } /** * Indicates whether samples have been recorded under a given <code>condition</code>. * * @param condition * the condition in question * @return true if samples for <code>condition</code> exist */ public boolean hasCondition(C condition) { return this.cfd.containsKey(condition); } /** * Increases a sample under a given <code>condition</code>. * * @param condition * the condition for this sample * @param sample * the sample to add */ public void inc(C condition, V sample) { List<V> samples = new ArrayList<V>(); samples.add(sample); incAll(condition, samples); } /** * Increases all provided samples under a given <code>condition</code>. * * <p> * If there is no {@link FrequencyDistribution} present for the given <code>condition</code>, a * new empty one will be created and populated from the given <code>samples</code>. * * @param condition * the condition for the samples * @param samples * the samples to add */ public void incAll(C condition, Iterable<V> samples) { FrequencyDistribution<V> freqDist = null; if (cfd.containsKey(condition)) { freqDist = cfd.get(condition); } else { freqDist = new FrequencyDistribution<V>(); cfd.put(condition, freqDist); } long countBefore = freqDist.getN(); freqDist.incAll(samples); this.n = n + (freqDist.getN() - countBefore); } /** * Adds a sample with a certain frequency under a given <code>condition</code>. * * @param condition * the condition for this sample * @param sample * the sample to add * @param frequency * the frequenc of the sample */ public void addSample(C condition, V sample, long frequency) { FrequencyDistribution<V> freqDist = null; if (cfd.containsKey(condition)) { freqDist = cfd.get(condition); } else { freqDist = new FrequencyDistribution<V>(); cfd.put(condition, freqDist); } long countBefore = freqDist.getN(); freqDist.addSample(sample, frequency); this.n = n + (freqDist.getN() - countBefore); } /** * Remove a previously registered condition * * @param condition * the condition to be removed */ public void removeCondition(C condition) { if (cfd.containsKey(condition)) { cfd.put(condition, null); cfd.remove(condition); } } @Override public String toString() { StringBuilder sb = new StringBuilder(); for (C t : cfd.keySet()) { sb.append(t.toString()); sb.append(System.getProperty("line.separator")); sb.append(cfd.get(t).toString()); sb.append(System.getProperty("line.separator")); } return sb.toString(); } }