/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.examples.research; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Map; import org.carrot2.clustering.lingo.LingoClusteringAlgorithm; import org.carrot2.clustering.stc.STCClusteringAlgorithm; import org.carrot2.core.*; import org.carrot2.output.metrics.ClusteringMetricsCalculator; import org.carrot2.output.metrics.ContaminationMetricDescriptor; import org.carrot2.output.metrics.NormalizedMutualInformationMetricDescriptor; import org.carrot2.output.metrics.PrecisionRecallMetricDescriptor; import org.carrot2.source.ambient.AmbientDocumentSource; import org.carrot2.source.ambient.AmbientDocumentSource.AmbientTopic; import org.carrot2.source.ambient.AmbientDocumentSourceDescriptor; import org.carrot2.text.util.TabularOutput; import org.carrot2.shaded.guava.common.collect.Lists; import org.carrot2.shaded.guava.common.collect.Maps; /** * Runs a clustering quality benchmark based on the data set embedded in * {@link AmbientDocumentSource}. */ public class ClusteringQualityBenchmark { public static void main(String [] args) { // Disable excessive logging final AmbientTopic [] topics = AmbientDocumentSource.AmbientTopic.values(); final Controller controller = ControllerFactory.createSimple(); // List of algorithms to test final ArrayList<Class<? extends IProcessingComponent>> algorithms = Lists .newArrayList(); algorithms.add(LingoClusteringAlgorithm.class); algorithms.add(STCClusteringAlgorithm.class); TabularOutput t = new TabularOutput(new PrintWriter(System.out)); t.columnSeparator(" | "); t.defaultFormat(Double.class).format("%.3f"); t.addColumn("Topic").alignLeft().format("%-18s"); t.addColumn("Algorithm").alignLeft().format("%-15s"); for (AmbientTopic topic : topics) { for (Class<? extends IProcessingComponent> algorithm : algorithms) { final Map<String, Object> attributes = Maps.newHashMap(); AmbientDocumentSourceDescriptor.attributeBuilder(attributes).topic(topic); ProcessingResult result = controller.process( attributes, AmbientDocumentSource.class, algorithm, ClusteringMetricsCalculator.class); t.rowData("Topic", topic.name()); t.rowData("Algorithm", algorithm.getSimpleName()); Map<String, Object> attrs = result.getAttributes(); t.rowData( "Contamination", attrs.get(ContaminationMetricDescriptor.Keys.WEIGHTED_AVERAGE_CONTAMINATION)); t.rowData( "F-Score", attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_F_MEASURE)); t.rowData( "Precision", attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_PRECISION)); t.rowData( "Recall", attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_RECALL)); t.rowData( "NMI", attrs.get(NormalizedMutualInformationMetricDescriptor.Keys.NORMALIZED_MUTUAL_INFORMATION)); t.nextRow(); } } } }