/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.result; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; import org.apache.metamodel.util.Ref; import org.apache.metamodel.util.SerializableRef; import org.datacleaner.api.AnalyzerResult; import org.datacleaner.api.Distributed; import org.datacleaner.api.Metric; import org.datacleaner.api.ParameterizableMetric; import org.datacleaner.storage.RowAnnotation; import org.datacleaner.storage.RowAnnotationFactory; import org.datacleaner.storage.RowAnnotationImpl; /** * A simple {@link AnalyzerResult} that exposes a set of categories/annotations */ @Distributed(reducer = CategorizationResultReducer.class) public class CategorizationResult implements AnalyzerResult { private static final long serialVersionUID = 1L; private final Ref<RowAnnotationFactory> _annotationFactoryRef; private final Map<String, RowAnnotation> _categories; public CategorizationResult(final RowAnnotationFactory annotationFactory, final Collection<Entry<String, RowAnnotation>> categories) { _annotationFactoryRef = new SerializableRef<>(annotationFactory); _categories = new LinkedHashMap<>(); for (final Entry<String, RowAnnotation> entry : categories) { _categories.put(entry.getKey(), entry.getValue()); } } public CategorizationResult(final RowAnnotationFactory annotationFactory, final Map<String, RowAnnotation> categories) { _annotationFactoryRef = new SerializableRef<>(annotationFactory); _categories = categories; } @Metric("Category count") public ParameterizableMetric getCategoryCount() { return new ParameterizableMetric() { @Override public Number getValue(final String parameter) { return getCategoryCount(parameter); } @Override public Collection<String> getParameterSuggestions() { return getCategoryNames(); } }; } public Collection<String> getCategoryNames() { return _categories.keySet(); } public int getCategoryCount(final String category) { final RowAnnotation annotation = getCategoryRowAnnotation(category); return annotation.getRowCount(); } public AnnotatedRowsResult getCategoryRowSample(final String category) { final RowAnnotationFactory rowAnnotationFactory = _annotationFactoryRef.get(); if (rowAnnotationFactory == null) { return null; } final RowAnnotation annotation = _categories.get(category); if (annotation == null) { return null; } if (!rowAnnotationFactory.hasSampleRows(annotation)) { return null; } return new AnnotatedRowsResult(annotation, rowAnnotationFactory); } public RowAnnotation getCategoryRowAnnotation(final String category) { final RowAnnotation annotation = _categories.get(category); if (annotation == null) { // return an empty annotation return new RowAnnotationImpl(); } return annotation; } }