/* * GeoTools - The Open Source Java GIS Toolkit * http://geotools.org * * (C) 2011-2016, Open Source Geospatial Foundation (OSGeo) * (C) 2008-2011 TOPP - www.openplans.org. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotools.process.vector; import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import org.geotools.data.simple.SimpleFeatureCollection; import org.geotools.factory.CommonFactoryFinder; import org.geotools.feature.visitor.*; import org.geotools.process.ProcessException; import org.geotools.process.factory.DescribeParameter; import org.geotools.process.factory.DescribeProcess; import org.geotools.process.factory.DescribeResult; import org.geotools.util.NullProgressListener; import org.opengis.feature.Feature; import org.opengis.feature.type.AttributeDescriptor; import org.opengis.util.ProgressListener; /** * Computes various attribute statistics over vector data sets. * * @author Andrea Aime * * @source $URL$ */ @DescribeProcess(title = "Aggregate", description = "Computes one or more aggregation functions on a feature attribute. Functions include Count, Average, Max, Median, Min, StdDev, and Sum.") public class AggregateProcess implements VectorProcess { // the functions this process can handle public enum AggregationFunction { Count, Average, Max, Median, Min, StdDev, Sum; } /** * Computes various attribute statistics over vector data sets * @param features FeatureCollection to aggregate * @param aggAttribute target attribute * @param functions * @param singlePass * @param progressListener * @return aggregate Results */ public static Results process(SimpleFeatureCollection features, String aggAttribute, Set<AggregationFunction> functions, Boolean singlePass, ProgressListener progressListener) throws ProcessException, IOException { return process(features, aggAttribute, functions, null, singlePass, progressListener); } public static Results process(SimpleFeatureCollection features, String aggAttribute, Set<AggregationFunction> functions, List<String> groupByAttributes, Boolean singlePass, ProgressListener progressListener) throws ProcessException, IOException { AggregateProcess process = new AggregateProcess(); return process.execute(features, aggAttribute, functions, singlePass, groupByAttributes, progressListener); } public Results execute(SimpleFeatureCollection features,String aggAttribute,Set<AggregationFunction> functions, boolean singlePass, ProgressListener progressListener) throws ProcessException, IOException { return execute(features, aggAttribute, functions, singlePass, null, progressListener); } @DescribeResult(name = "result", description = "Aggregation results (one value for each function computed)") public Results execute( @DescribeParameter(name = "features", description = "Input feature collection") SimpleFeatureCollection features, @DescribeParameter(name = "aggregationAttribute", min = 0, description = "Attribute on which to perform aggregation") String aggAttribute, @DescribeParameter(name = "function", description = "An aggregate function to compute. Functions include Count, Average, Max, Median, Min, StdDev, and Sum.", collectionType = AggregationFunction.class) Set<AggregationFunction> functions, @DescribeParameter(name = "singlePass", description = "If True computes all aggregation values in a single pass (this will defeat DBMS-specific optimizations)", defaultValue = "false") boolean singlePass, @DescribeParameter(name = "groupByAttributes", min = 0, description = "List of group by attributes", collectionType = String.class) List<String> groupByAttributes, ProgressListener progressListener) throws ProcessException, IOException { if (groupByAttributes != null && !groupByAttributes.isEmpty()) { // this request as group by attributes which need special care return handleGroupByVisitor(features, aggAttribute, functions, groupByAttributes, progressListener); } int attIndex = -1; List<AttributeDescriptor> atts = features.getSchema().getAttributeDescriptors(); for (int i = 0; i < atts.size(); i++) { if (atts.get(i).getLocalName().equals(aggAttribute)) { attIndex = i; break; } } if (attIndex == -1) { throw new ProcessException("Could not find attribute " + "[" + aggAttribute + "] " + " the valid values are " + attNames(atts)); } if (functions == null ){ throw new NullPointerException("Aggregate function to call is required"); } List<AggregationFunction> functionList = new ArrayList<AggregationFunction>(functions); List<FeatureCalc> visitors = new ArrayList<FeatureCalc>(); for (AggregationFunction function : functionList) { FeatureCalc calc; if (function == AggregationFunction.Average) { calc = new AverageVisitor(attIndex, features.getSchema()); } else if (function == AggregationFunction.Count) { calc = new CountVisitor(); } else if (function == AggregationFunction.Max) { calc = new MaxVisitor(attIndex, features.getSchema()); } else if (function == AggregationFunction.Median) { calc = new MedianVisitor(attIndex, features.getSchema()); } else if (function == AggregationFunction.Min) { calc = new MinVisitor(attIndex, features.getSchema()); } else if (function == AggregationFunction.StdDev) { calc = new StandardDeviationVisitor(CommonFactoryFinder.getFilterFactory(null).property(aggAttribute)); } else if (function == AggregationFunction.Sum) { calc = new SumVisitor(attIndex, features.getSchema()); } else { throw new ProcessException("Uknown method " + function); } visitors.add(calc); } EnumMap<AggregationFunction, Number> results = new EnumMap<AggregationFunction, Number>(AggregationFunction.class); if (singlePass) { AggregateFeatureCalc calc = new AggregateFeatureCalc(visitors); features.accepts(calc, new NullProgressListener()); List<CalcResult> resultList = (List<CalcResult>) calc.getResult().getValue(); for (int i = 0; i < functionList.size(); i++) { CalcResult result = resultList.get(i); if(result != null) { results.put(functionList.get(i), (Number) result.getValue()); } } } else { for (int i = 0; i < functionList.size(); i++) { final FeatureCalc calc = visitors.get(i); features.accepts(calc, new NullProgressListener()); results.put(functionList.get(i), (Number) calc.getResult().getValue()); } } return new Results(aggAttribute, functions, results); } /** * Helper method that handle requests that have group by attributes by wrapping the functions in group by visitors. */ private Results handleGroupByVisitor(SimpleFeatureCollection features, String aggAttribute, Set<AggregationFunction> functions, List<String> rawGroupByAttributes, ProgressListener progressListener) throws IOException { // building a group by visitor for every aggregate function List<GroupByVisitor> groupByVisitors = functions.stream().map(function -> new GroupByVisitorBuilder() .withAggregateAttribute(aggAttribute, features.getSchema()) .withAggregateVisitor(function.name()) .withGroupByAttributes(rawGroupByAttributes, features.getSchema()) .withProgressListener(progressListener) .build()).collect(Collectors.toList()); // visiting the features collection with each visitor for (GroupByVisitor visitor : groupByVisitors) { features.accepts(visitor, progressListener); } // extracting the results from each group by visitor List<Map<List<Object>, Object>> results = groupByVisitors.stream() .map(visitor -> (Map<List<Object>, Object>)visitor.getResult().toMap()) .collect(Collectors.toList()); return new Results(aggAttribute, functions, rawGroupByAttributes, mergeResults(results, rawGroupByAttributes.size())); } /** * Helper method that merge all group by visitors results in a tabular format. Each line of the table is composed * of the group by attributes values and the aggregation functions results. */ private List<Object[]> mergeResults(List<Map<List<Object>, Object>> results, int groupByAttributesNumber) { List<Object[]> mergedResults = new ArrayList<>(); if(results.isEmpty()) { // no results so nothing to do return mergedResults; } // the size of each line is the number of the group by attributes plus the number of aggregation functions int resultSize = groupByAttributesNumber + results.size(); // the group by attributes values are equal for all the visitors so we use the first visitor result to grab all the group by values for(List<Object> groupByAttributes : results.get(0).keySet()) { // we create the table line that will contains all the results Object[] mergedResult = Arrays.copyOf(groupByAttributes.toArray(), resultSize); // we extract from each group by visitor result the aggregation function result and add it to out table line for(int i = 0; i < results.size(); i++) { mergedResult[groupByAttributesNumber + i] = results.get(i).get(groupByAttributes); } // we add the current line to the table mergedResults.add(mergedResult); } return mergedResults; } private List<String> attNames(List<AttributeDescriptor> atts) { List<String> result = new ArrayList<String>(); for (AttributeDescriptor ad : atts) { result.add(ad.getLocalName()); } return result; } /** * Runs various {@link FeatureCalc} in a single pass * * @author Andrea Aime - GeoSolutions */ static class AggregateFeatureCalc implements FeatureCalc { List<FeatureCalc> delegates; public AggregateFeatureCalc(List<FeatureCalc> delegates) { super(); this.delegates = delegates; } public CalcResult getResult() { final List<CalcResult> results = new ArrayList<CalcResult>(); for (FeatureCalc delegate : delegates) { results.add(delegate.getResult()); } return new AbstractCalcResult() { @Override public Object getValue() { return results; } }; } public void visit(Feature feature) { for (FeatureCalc delegate : delegates) { delegate.visit(feature); } } } /** * The aggregate function results */ public static final class Results { Double min; Double max; Double median; Double average; Double standardDeviation; Double sum; Long count; // this values are used by output formats that want to add more meta information (the JSON tabular output format for example) String aggregateAttribute; Set<AggregationFunction> functions; List<String> groupByAttributes; List<Object[]> groupByResult; EnumMap<AggregationFunction, Number> results; // this constructor is used to output group by results public Results(String aggregateAttribute, Set<AggregationFunction> functions, List<String> groupByAttributes, List<Object[]> groupByResult) { this.aggregateAttribute = aggregateAttribute; this.functions = functions; this.groupByAttributes = groupByAttributes; this.groupByResult = groupByResult; } // this constructor is used to output normal aggregations results public Results(String aggregateAttribute, Set<AggregationFunction> functions, EnumMap<AggregationFunction, Number> results) { this.aggregateAttribute = aggregateAttribute; this.functions = functions; this.results = results; min = toDouble(results.get(AggregationFunction.Min)); max = toDouble(results.get(AggregationFunction.Max)); median = toDouble(results.get(AggregationFunction.Median)); average = toDouble(results.get(AggregationFunction.Average)); standardDeviation = toDouble(results.get(AggregationFunction.StdDev)); sum = toDouble(results.get(AggregationFunction.Sum)); Number nc = results.get(AggregationFunction.Count); if(nc != null) { count = nc.longValue(); } } Double toDouble(Number number) { if(number == null) { return null; } else { return number.doubleValue(); } } public Double getMin() { return min; } public Double getMax() { return max; } public Double getMedian() { return median; } public Double getAverage() { return average; } public Double getStandardDeviation() { return standardDeviation; } public Double getSum() { return sum; } public Long getCount() { return count; } public String getAggregateAttribute() { return aggregateAttribute; } public Set<AggregationFunction> getFunctions() { return functions; } public List<String> getGroupByAttributes() { return groupByAttributes; } public List<Object[]> getGroupByResult() { return groupByResult; } public EnumMap<AggregationFunction, Number> getResults() { return results; } } }