/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.result;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import org.eobjects.analyzer.util.LabelUtils;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.Func;
import org.apache.metamodel.util.Predicate;
/**
* An abstract implementation of {@link ValueCountingAnalyzerResult} which
* implements the most important metric: The value count.
*/
public abstract class AbstractValueCountingAnalyzerResult implements ValueCountingAnalyzerResult {
private static final long serialVersionUID = 1L;
@Metric(value = "Value count", supportsInClause = true)
public final QueryParameterizableMetric getValueCount() {
return new QueryParameterizableMetric() {
@Override
public Collection<String> getParameterSuggestions() {
final Collection<ValueFrequency> valueCounts = AbstractValueCountingAnalyzerResult.this
.getValueCounts();
final List<String> result = CollectionUtils.map(valueCounts, new Func<ValueFrequency, String>() {
@Override
public String eval(ValueFrequency vc) {
return vc.getName();
}
});
result.remove(null);
result.remove(LabelUtils.NULL_LABEL);
result.remove(LabelUtils.UNEXPECTED_LABEL);
return result;
}
@Override
public int getTotalCount() {
return AbstractValueCountingAnalyzerResult.this.getTotalCount();
}
@Override
public int getInstanceCount(String instance) {
Integer count = getCount(instance);
if (count == null) {
return 0;
}
return count;
}
};
}
@Override
public Collection<ValueFrequency> getReducedValueFrequencies(final int preferredMaximum) {
final Collection<ValueFrequency> original = getValueCounts();
final Collection<ValueFrequency> result = new TreeSet<ValueFrequency>(original);
if (original.size() <= preferredMaximum) {
// check if any composite value freq's can be exploded
for (ValueFrequency valueFrequency : original) {
if (valueFrequency.isComposite()) {
List<ValueFrequency> children = valueFrequency.getChildren();
if (children != null) {
if (result.size() - 1 + children.size() <= preferredMaximum) {
// replace with children
result.remove(valueFrequency);
result.addAll(children);
}
}
}
}
return result;
}
// Attempt to group/reduce values by frequency.
final SortedSet<List<ValueFrequency>> values;
{
Collection<List<ValueFrequency>> allValues;
{
// Add all the non-composite value freq's to a map where we can
// group
// them by their frequency
final Map<Integer, List<ValueFrequency>> frequencyMap = new HashMap<Integer, List<ValueFrequency>>();
for (ValueFrequency valueFrequency : original) {
if (!valueFrequency.isComposite()) {
int count = valueFrequency.getCount();
List<ValueFrequency> list = frequencyMap.get(count);
if (list == null) {
list = new LinkedList<ValueFrequency>();
frequencyMap.put(count, list);
}
list.add(valueFrequency);
}
}
allValues = frequencyMap.values();
}
allValues = CollectionUtils.filter(allValues, new Predicate<List<ValueFrequency>>() {
@Override
public Boolean eval(List<ValueFrequency> list) {
return list.size() > 1;
}
});
values = new TreeSet<List<ValueFrequency>>(new Comparator<List<?>>() {
@Override
public int compare(List<?> o1, List<?> o2) {
int diff = o2.size() - o1.size();
if (diff == 0) {
return -1;
}
return diff;
}
});
values.addAll(allValues);
}
final Iterator<List<ValueFrequency>> iterator = values.iterator();
while (result.size() > preferredMaximum && iterator.hasNext()) {
final List<ValueFrequency> groupChildren = iterator.next();
final int groupFrequency = groupChildren.get(0).getCount();
final String groupName = "<count=" + groupFrequency + ">";
final ValueFrequency compositeValueFrequency = new CompositeValueFrequency(groupName, groupChildren);
result.removeAll(groupChildren);
result.add(compositeValueFrequency);
}
return result;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Value distribution for: ");
sb.append(getName());
appendToString(sb, this, 4);
return sb.toString();
}
/**
* Appends a string representation with a maximum amount of entries
*
* @param sb
* the StringBuilder to append to
*
* @param maxEntries
* @return
*/
protected void appendToString(StringBuilder sb, ValueCountingAnalyzerResult groupResult, int maxEntries) {
if (maxEntries != 0) {
Collection<ValueFrequency> valueCounts = groupResult.getValueCounts();
for (ValueFrequency valueCount : valueCounts) {
sb.append("\n - ");
sb.append(valueCount.getName());
sb.append(": ");
sb.append(valueCount.getCount());
maxEntries--;
if (maxEntries == 0) {
sb.append("\n ...");
break;
}
}
}
}
}