/* * The MIT License (MIT) * ------------------ * * Copyright (c) 2012-2014 Philipp Nolte * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* * This software was taken from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier * and moved to the org.loklak class hierarchy to be enhanced and extended here * by @0rb1t3r. The original copyright notice was copied from the README.mnd * from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier/blob/master/README.md * The original package domain was de.daslaboratorium.machinelearning.classifier */ package org.loklak.tools.bayes; import java.util.Collection; import java.util.Comparator; import java.util.SortedSet; import java.util.TreeSet; /** * A concrete implementation of the abstract Classifier class. The Bayes * classifier implements a naive Bayes approach to classifying a given set of * features: classify(feat1,...,featN) = argmax(P(cat)*PROD(P(featI|cat) * * @author Philipp Nolte * * @see http://en.wikipedia.org/wiki/Naive_Bayes_classifier * * @param <T> The feature class. * @param <K> The category class. */ public class BayesClassifier<T, K> extends Classifier<T, K> { /** * Calculates the product of all feature probabilities: PROD(P(featI|cat) * * @param features The set of features to use. * @param category The category to test for. * @return The product of all feature probabilities. */ private float featuresProbabilityProduct(Collection<T> features, K category) { float product = 1.0f; for (T feature : features) product *= this.featureWeighedAverage(feature, category); return product; } /** * Calculates the probability that the features can be classified as the * category given. * * @param features The set of features to use. * @param category The category to test for. * @return The probability that the features can be classified as the * category. */ private float categoryProbability(Collection<T> features, K category) { return ((float) this.categoryCount(category) / (float) this.getCategoriesTotal()) * featuresProbabilityProduct(features, category); } /** * Retrieves a sorted <code>Set</code> of probabilities that the given set * of features is classified as the available categories. * * @param features The set of features to use. * @return A sorted <code>Set</code> of category-probability-entries. */ private SortedSet<Classification<T, K>> categoryProbabilities( Collection<T> features) { /* * Sort the set according to the possibilities. Because we have to sort * by the mapped value and not by the mapped key, we can not use a * sorted tree (TreeMap) and we have to use a set-entry approach to * achieve the desired functionality. A custom comparator is therefore * needed. */ SortedSet<Classification<T, K>> probabilities = new TreeSet<Classification<T, K>>( new Comparator<Classification<T, K>>() { @Override public int compare(Classification<T, K> o1, Classification<T, K> o2) { int toReturn = Float.compare( o1.getProbability(), o2.getProbability()); if ((toReturn == 0) && !o1.getCategory().equals(o2.getCategory())) toReturn = -1; return toReturn; } }); for (K category : this.getCategories()) probabilities.add(new Classification<T, K>( features, category, this.categoryProbability(features, category))); return probabilities; } /** * Classifies the given set of features. * * @return The category the set of features is classified as. */ @Override public Classification<T, K> classify(Collection<T> features) { SortedSet<Classification<T, K>> probabilites = this.categoryProbabilities(features); if (probabilites.size() > 0) { return probabilites.last(); } return null; } /** * Classifies the given set of features. and return the full details of the * classification. * * @return The set of categories the set of features is classified as. */ public Collection<Classification<T, K>> classifyDetailed( Collection<T> features) { return this.categoryProbabilities(features); } }