/* * GeoTools - The Open Source Java GIS Toolkit * http://geotools.org * * (C) 2004-2010, Open Source Geospatial Foundation (OSGeo) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotools.filter.function; import java.util.ArrayList; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; import org.geotools.data.simple.SimpleFeatureCollection; import org.geotools.data.simple.SimpleFeatureIterator; import org.geotools.feature.FeatureCollection; import org.geotools.util.logging.Logging; import org.opengis.feature.simple.SimpleFeature; /** * Calculate the Jenks' Natural Breaks classification for a featurecollection * * @author Ian Turton * * @source $URL: http://svn.osgeo.org/geotools/trunk/modules/library/main/src/main/java/org/geotools/filter/function/JenksNaturalBreaksFunction.java $ */ public class JenksNaturalBreaksFunction extends ClassificationFunction { org.opengis.util.ProgressListener progress; private static final Logger logger = Logging.getLogger("org.geotools.filter.function"); /** * */ public JenksNaturalBreaksFunction() { setName("Jenks"); } /* * (non-Javadoc) * * @see org.geotools.filter.function.ClassificationFunction#getArgCount() */ public int getArgCount() { // TODO Auto-generated method stub return 2; } /* * (non-Javadoc) * * @see org.geotools.filter.function.ClassificationFunction#evaluate(java.lang.Object) */ public Object evaluate(Object feature) { if (!(feature instanceof FeatureCollection)) { return null; } return calculate((SimpleFeatureCollection) feature); } /** * This is based on James' GeoTools1 code which seems to be based on * http://lib.stat.cmu.edu/cmlib/src/cluster/fish.f * * @param feature * @return a RangedClassifier */ private Object calculate(SimpleFeatureCollection featureCollection) { SimpleFeatureIterator features = featureCollection.features(); ArrayList<Double> data = new ArrayList<Double>(); try { while (features.hasNext()) { SimpleFeature feature = features.next(); final Object result = getExpression().evaluate(feature); logger.finest("importing " + result); if (result != null) { final Double e = new Double(result.toString()); if (!e.isInfinite() && !e.isNaN()) data.add(e); } } } catch (NumberFormatException e) { return null; // if it isn't a number what should we do? } Collections.sort(data); final int k = getClasses(); final int m = data.size(); if (k == m) { logger.info("Number of classes (" + k + ") is equal to number of data points (" + m + ") " + "unique classification returned"); Comparable[] localMin = new Comparable[k]; Comparable[] localMax = new Comparable[k]; for (int id = 0; id < k - 1; id++) { localMax[id] = data.get(id + 1); localMin[id] = data.get(id); } localMax[k - 1] = data.get(k - 1); localMin[k - 1] = data.get(k - 1); return new RangedClassifier(localMin, localMax); } int[][] iwork = new int[m + 1][k + 1]; double[][] work = new double[m + 1][k + 1]; for (int j = 1; j <= k; j++) { // the first item is always in the first class! iwork[0][j] = 1; iwork[1][j] = 1; // initialize work matirix work[1][j] = 0; for (int i = 2; i <= m; i++) { work[i][j] = Double.MAX_VALUE; } } // calculate the class for each data item for (int i = 1; i <= m; i++) { // sum of data values double s1 = 0; // sum of squares of data values double s2 = 0; double var = 0.0; // consider all the previous values for (int ii = 1; ii <= i; ii++) { // index in to sorted data array int i3 = i - ii + 1; // remember to allow for 0 index double val = data.get(i3 - 1); // update running totals s2 = s2 + (val * val); s1 += val; double s0 = (double) ii; // calculate (square of) the variance // (http://secure.wikimedia.org/wikipedia/en/wiki/Standard_deviation#Rapid_calculation_methods) var = s2 - ((s1 * s1) / s0); // System.out.println(s0+" "+s1+" "+s2); // System.out.println(i+","+ii+" var "+var); int ik = i3 - 1; if (ik != 0) { // not the last value for (int j = 2; j <= k; j++) { // for each class compare current value to var + previous value // System.out.println("\tis "+work[i][j]+" >= "+(var + work[ik][j - 1])); if (work[i][j] >= (var + work[ik][j - 1])) { // if it is greater or equal update classification iwork[i][j] = i3 - 1; // System.out.println("\t\tiwork["+i+"]["+j+"] = "+i3); work[i][j] = var + work[ik][j - 1]; } } } } // store the latest variance! iwork[i][1] = 1; work[i][1] = var; } if (logger.getLevel() == Level.FINER) { for (int i = 0; i < m; i++) { String tmp = (i + ": " + data.get(i)); for (int j = 2; j <= k; j++) { tmp+=("\t" + iwork[i][j]); } logger.finer(tmp); } } // go through matrix and extract class breaks int ik = m - 1; Comparable[] localMin = new Comparable[k]; Comparable[] localMax = new Comparable[k]; localMax[k - 1] = data.get(ik); for (int j = k; j >= 2; j--) { logger.finest("index "+ik + ", class" + j); int id = (int) iwork[ik][j] - 1; // subtract one as we want inclusive breaks on the // left? localMax[j - 2] = data.get(id); localMin[j - 1] = data.get(id); ik = (int) iwork[ik][j] - 1; } localMin[0] = data.get(0); /* * for(int k1=0;k1<k;k1++) { System.out.println(k1+" "+localMin[k1]+" - "+localMax[k1]); } */ return new RangedClassifier(localMin, localMax); } }