package org.activityinfo.server.report.generator.map;
/*
* #%L
* ActivityInfo Server
* %%
* Copyright (C) 2009 - 2013 UNICEF
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
import com.google.common.collect.Lists;
import java.util.Arrays;
import java.util.LinkedList;
/**
* The Jenks optimization method, also called the Jenks natural breaks
* classification method, is a data classification method designed to determine
* the best arrangement of values into different classes. This is done by
* seeking to minimize each class’s average deviation from the class mean, while
* maximizing each class’s deviation from the means of the other groups. In
* other words, the method seeks to reduce the variance within classes and
* maximize the variance between classes.
*/
public class Jenks {
private LinkedList<Double> list = Lists.newLinkedList();
public void addValue(double value) {
list.add(value);
}
public void addValues(double... values) {
for (double value : values) {
addValue(value);
}
}
/**
* @return
*/
public Breaks computeBreaks() {
double[] list = toSortedArray();
int uniqueValues = countUnique(list);
if (uniqueValues <= 3) {
return computeBreaks(list, uniqueValues);
}
Breaks lastBreaks = computeBreaks(list, 3);
double lastGvf = lastBreaks.gvf();
double lastImprovement = lastGvf - computeBreaks(list, 2).gvf();
for (int i = 3; i <= Math.min(6, uniqueValues); ++i) {
Breaks breaks = computeBreaks(list, 3);
double gvf = breaks.gvf();
double marginalImprovement = gvf - lastGvf;
if (marginalImprovement < lastImprovement) {
return lastBreaks;
}
lastBreaks = breaks;
lastGvf = gvf;
lastImprovement = marginalImprovement;
}
return lastBreaks;
}
private double[] toSortedArray() {
double[] values = new double[this.list.size()];
for (int i = 0; i != values.length; ++i) {
values[i] = this.list.get(i);
}
Arrays.sort(values);
return values;
}
private int countUnique(double[] sortedList) {
int count = 1;
for (int i = 1; i < sortedList.length; ++i) {
if (sortedList[i] != sortedList[i - 1]) {
count++;
}
}
return count;
}
/**
* @param list sorted list of values
* @param numclass int number of classes
* @return int[] breaks (upper indices of class)
*/
public Breaks computeBreaks(int numclass) {
return computeBreaks(toSortedArray(), numclass, new Identity());
}
private Breaks computeBreaks(double[] list, int numclass) {
return computeBreaks(list, numclass, new Identity());
}
private Breaks computeBreaks(double[] list, int numclass, DoubleFunction transform) {
int numdata = list.length;
if (numdata == 0) {
return new Breaks(new double[0], new int[0]);
}
double[][] mat1 = new double[numdata + 1][numclass + 1];
double[][] mat2 = new double[numdata + 1][numclass + 1];
for (int i = 1; i <= numclass; i++) {
mat1[1][i] = 1;
mat2[1][i] = 0;
for (int j = 2; j <= numdata; j++) {
mat2[j][i] = Double.MAX_VALUE;
}
}
double v = 0;
for (int l = 2; l <= numdata; l++) {
double s1 = 0;
double s2 = 0;
double w = 0;
for (int m = 1; m <= l; m++) {
int i3 = l - m + 1;
double val = transform.apply(list[i3 - 1]);
s2 += val * val;
s1 += val;
w++;
v = s2 - (s1 * s1) / w;
int i4 = i3 - 1;
if (i4 != 0) {
for (int j = 2; j <= numclass; j++) {
if (mat2[l][j] >= (v + mat2[i4][j - 1])) {
mat1[l][j] = i3;
mat2[l][j] = v + mat2[i4][j - 1];
}
}
}
}
mat1[l][1] = 1;
mat2[l][1] = v;
}
int k = numdata;
int[] kclass = new int[numclass];
kclass[numclass - 1] = list.length - 1;
for (int j = numclass; j >= 2; j--) {
int id = (int) (mat1[k][j]) - 2;
kclass[j - 2] = id;
k = (int) mat1[k][j] - 1;
}
return new Breaks(list, kclass);
}
private interface DoubleFunction {
double apply(double x);
}
private static class Log10 implements DoubleFunction {
@Override
public double apply(double x) {
return Math.log10(x);
}
}
public static class Identity implements DoubleFunction {
@Override
public double apply(double x) {
return x;
}
}
public static class Breaks {
private double[] sortedValues;
private int[] breaks;
/**
* @param sortedValues the complete array of sorted data values
* @param breaks the indexes of the values within the sorted array that begin new classes
*/
private Breaks(double[] sortedValues, int[] breaks) {
this.sortedValues = sortedValues;
this.breaks = breaks;
}
/**
* The Goodness of Variance Fit (GVF) is found by taking the difference
* between the squared deviations from the array mean (SDAM) and the
* squared deviations from the class means (SDCM), and dividing by the
* SDAM
*
* @return
*/
public double gvf() {
double sdam = sumOfSquareDeviations(sortedValues);
double sdcm = 0.0;
for (int i = 0; i != numClassses(); ++i) {
sdcm += sumOfSquareDeviations(classList(i));
}
return (sdam - sdcm) / sdam;
}
private double sumOfSquareDeviations(double[] values) {
double mean = mean(values);
double sum = 0.0;
for (int i = 0; i != values.length; ++i) {
double sqDev = Math.pow(values[i] - mean, 2);
sum += sqDev;
}
return sum;
}
public double[] getValues() {
return sortedValues;
}
private double[] classList(int i) {
int classStart = (i == 0) ? 0 : breaks[i - 1] + 1;
int classEnd = breaks[i];
double list[] = new double[classEnd - classStart + 1];
for (int j = classStart; j <= classEnd; ++j) {
list[j - classStart] = sortedValues[j];
}
return list;
}
/**
* @param classIndex
* @return the minimum value (inclusive) of the given class
*/
public double getClassMin(int classIndex) {
if (classIndex == 0) {
return sortedValues[0];
} else {
return sortedValues[breaks[classIndex - 1] + 1];
}
}
/**
* @param classIndex
* @return the maximum value (inclusive) of the given class
*/
public double getClassMax(int classIndex) {
return sortedValues[breaks[classIndex]];
}
public int getClassCount(int classIndex) {
if (classIndex == 0) {
return breaks[0] + 1;
} else {
return breaks[classIndex] - breaks[classIndex - 1];
}
}
private double mean(double[] values) {
double sum = 0;
for (int i = 0; i != values.length; ++i) {
sum += values[i];
}
return sum / values.length;
}
public int numClassses() {
return breaks.length;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i != numClassses(); ++i) {
if (getClassMin(i) == getClassMax(i)) {
sb.append(getClassMin(i));
} else {
sb.append(getClassMin(i)).append(" - ").append(getClassMax(i));
}
sb.append(" (" + getClassCount(i) + ")");
sb.append(" = ").append(Arrays.toString(classList(i)));
sb.append("\n");
}
return sb.toString();
}
public int classOf(double value) {
for (int i = 0; i != numClassses(); ++i) {
if (value <= getClassMax(i)) {
return i;
}
}
return numClassses() - 1;
}
}
}