/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.examples.clustering; import java.util.HashMap; import java.util.List; import java.util.Map; import org.carrot2.clustering.lingo.LingoClusteringAlgorithm; import org.carrot2.clustering.lingo.LingoClusteringAlgorithmDescriptor; import org.carrot2.core.Cluster; import org.carrot2.core.Controller; import org.carrot2.core.ControllerFactory; import org.carrot2.core.Document; import org.carrot2.core.ProcessingResult; import org.carrot2.core.attribute.CommonAttributesDescriptor; import org.carrot2.examples.ConsoleFormatter; import org.carrot2.examples.SampleDocumentData; import org.carrot2.matrix.factorization.IterationNumberGuesser.FactorizationQuality; import org.carrot2.source.microsoft.v5.Bing5DocumentSource; import org.carrot2.source.microsoft.v5.Bing5DocumentSourceDescriptor; /** * This example shows how to customize the behaviour of clustering algorithms and * document sources by setting attributes. For a complete summary of the available * attributes, please see Carrot2 manual. */ public class UsingAttributes { @SuppressWarnings("unused") public static void main(String [] args) { /* [[[start:using-attributes-raw-map-intro]]] * <div> * <p> * You can change the default behaviour of clustering algorithms and document sources * by changing their <em>attributes</em>. For a complete list of available attributes, * their identifiers, types and allowed values, please see Carrot2 manual. * </p> * <p> * To pass attributes to Carrot2, put them into a {@link java.util.Map}, * along with query or documents being clustered. The code shown below searches the * web using {@link org.carrot2.source.microsoft.v5.Bing5DocumentSource} * and clusters the results using {@link org.carrot2.clustering.lingo.LingoClusteringAlgorithm} * customized to create fewer clusters than by default. * </p> * </div> * [[[end:using-attributes-raw-map-intro]]] */ { // [[[start:using-attributes-raw-map]]] /* A controller to manage the processing pipeline. */ final Controller controller = ControllerFactory.createSimple(); /* Prepare attribute map */ final Map<String, Object> attributes = new HashMap<String, Object>(); /* Put attribute values using direct keys. */ attributes.put(CommonAttributesDescriptor.Keys.QUERY, "data mining"); attributes.put(CommonAttributesDescriptor.Keys.RESULTS, 100); attributes.put("LingoClusteringAlgorithm.desiredClusterCountBase", 15); /* Put your own API key here! */ attributes.put(Bing5DocumentSourceDescriptor.Keys.API_KEY, BingKeyAccess.getKey()); /* Perform processing */ final ProcessingResult result = controller.process(attributes, Bing5DocumentSource.class, LingoClusteringAlgorithm.class); /* Documents fetched from the document source, clusters created by Carrot2. */ final List<Document> documents = result.getDocuments(); final List<Cluster> clusters = result.getClusters(); // [[[end:using-attributes-raw-map]]] ConsoleFormatter.displayResults(result); } /* [[[start:using-attributes-builders-intro]]] * * <div> * <p> * As an alternative to the raw attribute map used in the previous example, you * can use attribute map builders. Attribute map builders have a number of advantages: * </p> * * <ul> * <li>Type-safety: the correct type of the value will be enforced at compile time</li> * <li>Error prevention: unexpected results caused by typos in attribute name strings are avoided</li> * <li>Early error detection: in case an attribute's key changes, your compiler will detect that</li> * <li>IDE support: your IDE will suggest the right method names and parameters</li> * </ul> * * <p> * A possible disadvantage of attribute builders is that one algorithm's attributes can * be divided into a number of builders and hence not readily available in your IDE's auto * complete window. Please consult attribute documentation in Carrot2 manual for pointers to * the appropriate builder classes and methods. * </p> * * <p> * The code shown below fetches 100 results for query <em>data mining</em> from * {@link org.carrot2.source.microsoft.v5.Bing5DocumentSource} and clusters them using * the {@link org.carrot2.clustering.lingo.LingoClusteringAlgorithm} tuned to create slightly * fewer clusters than by default. Please note how the API key is passed and use your own * key in production deployments. * </p> * </div> * * [[[end:using-attributes-builders-intro]]] */ { /// [[[start:using-attributes-builders]]] /* A controller to manage the processing pipeline. */ final Controller controller = ControllerFactory.createSimple(); /* Prepare attribute map */ final Map<String, Object> attributes = new HashMap<String, Object>(); /* Put values using attribute builders */ CommonAttributesDescriptor .attributeBuilder(attributes) .query("data mining") .results(100); LingoClusteringAlgorithmDescriptor .attributeBuilder(attributes) .desiredClusterCountBase(15) .matrixReducer() .factorizationQuality(FactorizationQuality.HIGH); Bing5DocumentSourceDescriptor .attributeBuilder(attributes) .apiKey(BingKeyAccess.getKey()); // use your own key here /* Perform processing */ final ProcessingResult result = controller.process(attributes, Bing5DocumentSource.class, LingoClusteringAlgorithm.class); /* Documents fetched from the document source, clusters created by Carrot2. */ final List<Document> documents = result.getDocuments(); final List<Cluster> clusters = result.getClusters(); /// [[[end:using-attributes-builders]]] ConsoleFormatter.displayResults(result); } /* [[[start:using-attributes-output-intro]]] * <div> * <p> * Some algorithms apart from clusters can produce additional, usually * diagnostic, output. The output is present in the attributes map contained * in the {@link org.carrot2.core.ProcessingResult}. You can read the contents * of that map directly or through the attribute map builders. Carrot2 manual * lists and describes in detail the output attributes of each component. * </p> * <p> * The code shown below clusters clusters an example collection of * {@link org.carrot2.core.Document}s using the Lingo algorithm. Lingo can * optionally use native platform-specific matrix computation libraries. The * example code reads an attribute to find out whether such libraries were * successfully loaded and used. * </p> * </div> * [[[end:using-attributes-output-intro]]] */ { /// [[[start:using-attributes-output]]] /* A controller to manage the processing pipeline. */ final Controller controller = ControllerFactory.createSimple(); /* Prepare attribute map */ final Map<String, Object> attributes = new HashMap<String, Object>(); CommonAttributesDescriptor .attributeBuilder(attributes) .documents(SampleDocumentData.DOCUMENTS_DATA_MINING); LingoClusteringAlgorithmDescriptor .attributeBuilder(attributes) .desiredClusterCountBase(15) .matrixReducer() .factorizationQuality(FactorizationQuality.HIGH); /* Perform processing */ final ProcessingResult result = controller.process(attributes, LingoClusteringAlgorithm.class); /* Clusters created by Carrot2, read processing time */ final List<Cluster> clusters = result.getClusters(); final Long clusteringTime = CommonAttributesDescriptor.attributeBuilder( result.getAttributes()).processingTimeAlgorithm(); /// [[[end:using-attributes-output]]] ConsoleFormatter.displayResults(result); } } }