/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.output.metrics; import java.util.*; import org.carrot2.core.Document; import org.carrot2.core.attribute.Processing; import org.carrot2.util.attribute.*; import org.carrot2.shaded.guava.common.base.Function; import org.carrot2.shaded.guava.common.collect.*; /** * A base class for metrics based on some reference partitioning. */ @Bindable public abstract class IdealPartitioningBasedMetric implements IClusteringMetric { /** * Partition id field name. */ @Input @Processing @Attribute public String partitionIdFieldName = Document.PARTITIONS; Set<Object> getPartitions(List<Document> documents) { final HashSet<Object> partitions = Sets.newHashSet(); for (Document document : documents) { final Collection<Object> documentPartitions = document .<Collection<Object>> getField(partitionIdFieldName); if (documentPartitions != null) { partitions.addAll(documentPartitions); } } return partitions; } /** * Returns the number of distinct {@link Document#PARTITIONS}s in a collection of * documents. Note if that at least one of the document has a <code>null</code> * partition, 0 will be returned. */ int getPartitionsCount(List<Document> documents) { return getPartitions(documents).size(); } /** * Returns documents grouped by partitions. */ SetMultimap<Object, Document> getDocumentsByPartition(List<Document> documents) { final SetMultimap<Object, Document> index = HashMultimap.create(); for (Document document : documents) { final Collection<Object> partitions = document.getField(partitionIdFieldName); for (Object partition : partitions) { index.put(partition, document); } } return ImmutableSetMultimap.copyOf(index); } /** * Returns document counts for each partition. */ Map<Object, Integer> getDocumentCountByPartition(List<Document> documents) { return ImmutableMap.copyOf(Maps.transformValues( getDocumentsByPartition(documents).asMap(), new Function<Collection<Document>, Integer>() { public Integer apply(Collection<Document> documents) { return documents.size(); } })); } }