/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.result; import java.util.Collection; import java.util.List; import java.util.TreeSet; import org.apache.metamodel.util.CollectionUtils; import org.datacleaner.api.Metric; import org.datacleaner.util.LabelUtils; /** * An abstract implementation of {@link ValueCountingAnalyzerResult} which * implements the most important metric: The value count. */ public abstract class AbstractValueCountingAnalyzerResult implements ValueCountingAnalyzerResult { private static final long serialVersionUID = 1L; @Metric(value = "Value count", supportsInClause = true) public final QueryParameterizableMetric getValueCount() { return new QueryParameterizableMetric() { @Override public Collection<String> getParameterSuggestions() { final Collection<ValueFrequency> valueCounts = AbstractValueCountingAnalyzerResult.this.getValueCounts(); final List<String> result = CollectionUtils.map(valueCounts, ValueFrequency::getName); result.remove(null); result.remove(LabelUtils.NULL_LABEL); result.remove(LabelUtils.UNEXPECTED_LABEL); result.remove(LabelUtils.UNIQUE_LABEL); return result; } @Override public int getTotalCount() { return AbstractValueCountingAnalyzerResult.this.getTotalCount(); } @Override public int getInstanceCount(final String instance) { final Integer count = getCount(instance); if (count == null) { return 0; } return count; } }; } @Override public Collection<ValueFrequency> getReducedValueFrequencies(final int preferredMaximum) { final Collection<ValueFrequency> original = getValueCounts(); final Collection<ValueFrequency> result = new TreeSet<>(original); if (original.size() <= preferredMaximum) { // check if any composite value freq's can be exploded for (final ValueFrequency valueFrequency : original) { if (valueFrequency.isComposite()) { final List<ValueFrequency> children = valueFrequency.getChildren(); if (children != null) { if (result.size() - 1 + children.size() <= preferredMaximum) { // replace with children result.remove(valueFrequency); result.addAll(children); } } } } return result; } else { return original; } } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Value distribution for: "); sb.append(getName()); appendToString(sb, this, 4); return sb.toString(); } /** * Appends a string representation with a maximum amount of entries * * @param sb * the StringBuilder to append to * * @param maxEntries * @return */ protected void appendToString(final StringBuilder sb, final ValueCountingAnalyzerResult groupResult, int maxEntries) { if (maxEntries != 0) { final Collection<ValueFrequency> valueCounts = groupResult.getValueCounts(); for (final ValueFrequency valueCount : valueCounts) { sb.append("\n - "); sb.append(valueCount.getName()); sb.append(": "); sb.append(valueCount.getCount()); maxEntries--; if (maxEntries == 0) { sb.append("\n ..."); break; } } } } }