/* * Carrot2 project. * * Copyright (C) 2002-2010, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.examples.research; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Map; import org.carrot2.clustering.lingo.LingoClusteringAlgorithm; import org.carrot2.clustering.stc.STCClusteringAlgorithm; import org.carrot2.core.Controller; import org.carrot2.core.ControllerFactory; import org.carrot2.core.IProcessingComponent; import org.carrot2.output.metrics.ClusteringMetricsCalculator; import org.carrot2.output.metrics.ContaminationMetricDescriptor; import org.carrot2.output.metrics.NormalizedMutualInformationMetricDescriptor; import org.carrot2.output.metrics.PrecisionRecallMetricDescriptor; import org.carrot2.source.ambient.AmbientDocumentSource; import org.carrot2.source.ambient.AmbientDocumentSource.AmbientTopic; import org.carrot2.source.ambient.AmbientDocumentSourceDescriptor; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * Runs a clustering quality benchmark based on the data set embedded in * {@link AmbientDocumentSource}. */ public class ClusteringQualityBenchmark { public static void main(String [] args) { // Disable excessive logging final AmbientTopic [] topics = AmbientDocumentSource.AmbientTopic.values(); final Controller controller = ControllerFactory.createSimple(); // List of algorithms to test final ArrayList<Class<? extends IProcessingComponent>> algorithms = Lists .newArrayList(); algorithms.add(LingoClusteringAlgorithm.class); algorithms.add(STCClusteringAlgorithm.class); // List of metrics to output final ArrayList<String> metrics = Lists.newArrayList( ContaminationMetricDescriptor.Keys.WEIGHTED_AVERAGE_CONTAMINATION, PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_F_MEASURE, PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_PRECISION, PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_RECALL, NormalizedMutualInformationMetricDescriptor.Keys.NORMALIZED_MUTUAL_INFORMATION); final Map<String, Object> attributes = Maps.newHashMap(); System.out .println("Topic\tAlgorithm\tContamination\tF-Score\tPrecision\tRecall\tNMI"); for (AmbientTopic topic : topics) { for (Class<? extends IProcessingComponent> algorithm : algorithms) { AmbientDocumentSourceDescriptor .attributeBuilder(attributes).topic(topic); controller.process(attributes, AmbientDocumentSource.class, algorithm, ClusteringMetricsCalculator.class); System.out.print(topic.name() + "\t" + algorithm.getSimpleName()); for (String metricKey : metrics) { System.out.print("\t" + MessageFormat.format("{0,number,#.####}", attributes .get(metricKey))); } System.out.println(); } } } }