/* * Encog(tm) Java Examples v3.4 * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-examples * * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.examples.ml.bayesian.words; import org.encog.util.Format; public class BayesianSpam { public final static String[] SPAM_DATA = { "offer is secret", "click secret link", "secret sports link" }; public final static String[] HAM_DATA = { "play sports today", "went play sports", "secret sports event", "sports is today", "sports costs money" }; public static void test(BayesianWordAnalyzer a, String message) { double d = a.probability(message); System.out.println("Probability of \"" + message + "\" being " + a.getClassName() + " is " + Format.formatPercent(d) + " ; " + a.getLastProblem()); } public static void testWordClass(BayesianWordAnalyzer a, String word) { double d = a.probabilityWordClass(word); System.out.println(a.getLastProblem() + " = " + Format.formatPercent(d)); } public static void testWordNotClass(BayesianWordAnalyzer a, String word) { double d = a.probabilityWordNotClass(word); System.out.println(a.getLastProblem() + " = " + Format.formatPercent(d)); } public static final void main(String[] args) { BayesianWordAnalyzer a1 = new BayesianWordAnalyzer(0,"spam",SPAM_DATA,"ham",HAM_DATA); System.out.println("Using Laplace of 0"); System.out.println("P(" + a1.getClassName() + "): " + Format.formatPercent(a1.getClassProbability())); System.out.println("P(" + a1.getNotClassName() + "): " + Format.formatPercent(a1.getNotClassProbability())); testWordClass(a1,"secret"); testWordNotClass(a1,"secret"); test(a1,"secret"); // 0.0 test(a1,"today"); // 0.0 test(a1,"sports"); // 16.67 test(a1,"today is secret"); // 0.0 test(a1,"secret is secret"); // 96.15 BayesianWordAnalyzer a2 = new BayesianWordAnalyzer(1,"spam",SPAM_DATA,"ham",HAM_DATA); System.out.println("Using Laplace of 1"); System.out.println("P(" + a2.getClassName() + "): " + Format.formatPercent(a2.getClassProbability())); System.out.println("P(" + a2.getNotClassName() + "): " + Format.formatPercent(a2.getNotClassProbability())); testWordClass(a2,"today"); testWordNotClass(a2,"today"); test(a2,"secret"); // 0.0 test(a2,"today"); test(a2,"sports"); test(a2,"today is secret"); // 48.58 test(a2,"secret is secret"); } }