/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.math.stat.descriptive.AggregateSummaryStatistics;
import org.apache.commons.math.stat.descriptive.StatisticalSummary;
import org.apache.commons.math.stat.descriptive.StatisticalSummaryValues;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.MockInputColumn;
import org.eobjects.analyzer.result.AbstractCrosstabResultReducer;
import org.eobjects.analyzer.result.Crosstab;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Result reducer for {@link NumberAnalyzerResult}s.
*
* Note: Some of the result metrics of {@link NumberAnalyzerResult} are NOT
* reduceable. Since the inclusion of these metrics are anyways optional (based
* on a configuration property), we take the optimistic approach and reduce what
* we can.
*
* Warnings will be raised if non-reduceable metrics are encountered.
*/
public class NumberAnalyzerResultReducer extends AbstractCrosstabResultReducer<NumberAnalyzerResult> {
private static final Logger logger = LoggerFactory.getLogger(NumberAnalyzerResultReducer.class);
private static final Set<String> SUM_MEASURES = new HashSet<String>(Arrays.asList(NumberAnalyzer.MEASURE_SUM,
NumberAnalyzer.MEASURE_ROW_COUNT, NumberAnalyzer.MEASURE_NULL_COUNT));
@Override
protected Serializable reduceValues(List<Object> slaveValues, String column, String measure,
Collection<? extends NumberAnalyzerResult> results, Class<?> valueClass) {
if (SUM_MEASURES.contains(measure)) {
return sum(slaveValues);
} else if (NumberAnalyzer.MEASURE_HIGHEST_VALUE.equals(measure)) {
return maximum(slaveValues);
} else if (NumberAnalyzer.MEASURE_LOWEST_VALUE.equals(measure)) {
return minimum(slaveValues);
} else if (NumberAnalyzer.MEASURE_MEAN.equals(measure)) {
StatisticalSummary summary = getSummary(column, results);
return summary.getMean();
} else if (NumberAnalyzer.MEASURE_STANDARD_DEVIATION.equals(measure)) {
StatisticalSummary summary = getSummary(column, results);
return summary.getStandardDeviation();
} else if (NumberAnalyzer.MEASURE_VARIANCE.equals(measure)) {
StatisticalSummary summary = getSummary(column, results);
return summary.getVariance();
}
logger.warn("Encountered non-reduceable measure '{}'. Slave values are: {}", measure, slaveValues);
return null;
}
private StatisticalSummary getSummary(String column, Collection<? extends NumberAnalyzerResult> results) {
final List<SummaryStatistics> statistics = new ArrayList<SummaryStatistics>(results.size());
for (NumberAnalyzerResult analyzerResult : results) {
SummaryStatistics stats = buildStatistics(column, analyzerResult);
statistics.add(stats);
}
final StatisticalSummaryValues summary = AggregateSummaryStatistics.aggregate(statistics);
return summary;
}
private SummaryStatistics buildStatistics(final String column, final NumberAnalyzerResult analyzerResult) {
final SummaryStatistics stats = new SummaryStatistics() {
private static final long serialVersionUID = 1L;
private final InputColumn<Number> col = new MockInputColumn<Number>(column);
@Override
public long getN() {
return analyzerResult.getRowCount(col).longValue();
}
@Override
public double getSum() {
return analyzerResult.getSum(col).longValue();
}
@Override
public double getVariance() {
return analyzerResult.getVariance(col).longValue();
}
@Override
public double getStandardDeviation() {
return analyzerResult.getStandardDeviation(col).longValue();
}
@Override
public double getMean() {
return analyzerResult.getMean(col).longValue();
}
@Override
public double getMin() {
return analyzerResult.getLowestValue(col).longValue();
}
@Override
public double getMax() {
return analyzerResult.getHighestValue(col).longValue();
}
@Override
public double getGeometricMean() {
return analyzerResult.getGeometricMean(col).doubleValue();
}
@Override
public double getSecondMoment() {
return analyzerResult.getSecondMoment(col).doubleValue();
}
@Override
public double getSumsq() {
return analyzerResult.getSumOfSquares(col).doubleValue();
}
};
return stats;
}
@Override
protected NumberAnalyzerResult buildResult(Crosstab<?> crosstab, Collection<? extends NumberAnalyzerResult> results) {
final NumberAnalyzerResult firstResult = results.iterator().next();
final InputColumn<? extends Number>[] columns = firstResult.getColumns();
return new NumberAnalyzerResult(columns, crosstab);
}
}