/* * Geotoolkit.org - An Open Source Java GIS Toolkit * http://www.geotoolkit.org * * (C) 2012, Geomatys * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotoolkit.image.classification; import java.util.LinkedList; import java.util.List; import org.apache.sis.util.ArgumentChecks; /** * <p>Define and compute two sort of data classifications.<br/> * Quantile classification.<br/> * Quantile classification is the most basic classification.<br/> * Algorithm divide in some equal parts(at best, if its possible) all data.<br/><br/> * Jenks classification.<br/> * Jenks method is the most effective, but also most costly in computing terms.<br/> * For each case, in first time, the algorithm computes the "intra-class variance" * ie the average of the variances of each of the classes.<br/> * A second step, consist to calculates the "inter-class variance", * ie the variance of each of the generated classes.<br/> * The aim is thus to minimize the "intra-class variance" so that each * elements group has generated individuals who "look at best" * and maximize the "inter-class variance" in order to obtain the most dissimilar classes possible.<br/><br/> * Data will aren't sort in ascending order.<br/><br/> * * Code example : <br/><br/> * * {@code Classification classify = new Classification();}<br/> * {@code classify.setData(double[]data);}<br/> * {@code classify.setClassNumber(5);}<br/> * {@code classify.computeQuantile();}<br/> * {@code or}<br/> * {@code classify.computeJenks();}<br/> * * {@code //results in two forms}<br/> * {@code classify.getIndex();}<br/> * {@code classify.getClasses();} * </p> * * @author RĂ©mi Marechal (Geomatys). */ public class Classification { /** * data will be classified. */ private double[] data = null; /** * Number of class which fragment data. */ private int classNumber; /** * List will be contain classification result. */ private final List<double[]> classList; /** * Data value number. */ private int dataLength; /** * Begin and ending classes index from {@link #data} table. */ private int[] index = null; /** * true if re-compute class list from {@link #index} table else false. */ private boolean reComputeList; /** * Define and compute two sort of data classifications.<br/> * Quantile classification.<br/> * Jenks classification. */ public Classification() { this.classNumber = 1; this.classList = new LinkedList<double[]>(); } /** * Class data from quantile method. */ public void computeQuantile() { if (data == null) throw new IllegalArgumentException("you must set data"); if (classNumber > dataLength) throw new IllegalArgumentException("impossible to classify datas" + " with class number larger than overall elements number"); this.index = new int[classNumber]; this.reComputeList = true; if (classNumber == 1) { index[0] = dataLength; return; } for (int i = 1; i<=classNumber; i++) index[i-1] = (int) Math.round(i*((double)dataLength)/classNumber); } /** * Class data from Jenks method. */ public void computeJenks() { computeJenks(true); } /** * Class data from Jenks method. */ public void computeJenks(boolean checkData) { if (data == null) throw new IllegalArgumentException("you must set data"); if (classNumber > dataLength) throw new IllegalArgumentException("impossible to classify datas" + " with class number larger than overall elements number"); if (checkData) { if(!checkJenksDataValidity()) { throw new IllegalArgumentException("not enough distincts datas for the requested number of classes"); } } this.index = new int[classNumber]; this.reComputeList = true; if (classNumber == 1) { index[0] = dataLength; return; } final int nbCol = classNumber + 1; int lenght = dataLength + 1; lenght *= nbCol; final int[] indexClassTab = new int[lenght]; final double[] moyVarTab = new double[lenght]; int currentIndex; for (int i = 0; i < classNumber; i++) { currentIndex = nbCol + i + 1; indexClassTab[currentIndex] = 1; moyVarTab[currentIndex] = 0; for (int j = 2; j <= dataLength; j++) moyVarTab[j*nbCol + i + 1] = Double.POSITIVE_INFINITY; } double somA, somB, len, currentVal, diff = 0; int currentId, idTemp; int idl = 1; while (idl < dataLength) { somA = somB = len = 0; int deb = 1; while (deb <= idl+1) { currentId = idl - deb+1; currentVal = data[currentId]; somB += currentVal * currentVal; somA += currentVal; len++; diff = somB - (somA * somA) / len; idTemp = currentId; if (idTemp != 0) for (int j = 1; j < classNumber; j++) { currentIndex = (idl+1) * nbCol + j + 1; if (moyVarTab[currentIndex] >= (diff + moyVarTab[idTemp*nbCol+j])) { indexClassTab[currentIndex] = currentId + 1; moyVarTab[currentIndex] = diff + moyVarTab[idTemp*nbCol+j]; } } deb++; } currentIndex = (idl + 1) * nbCol + 1; indexClassTab[currentIndex] = 1; moyVarTab[currentIndex] = diff; idl++; } int idata = dataLength; index[classNumber - 1] = dataLength; for (int j = classNumber; j >= 2; j--) index[j - 2] = idata = indexClassTab[idata * nbCol + j] - 1; } /** * Return classification result. * * @return classification result. */ public List<double[]> getClasses() { if (index == null) throw new IllegalStateException("you must call compute method to fill index table"); if (!reComputeList) return classList; int max, len, min = 0; double[] result; classList.clear(); for (int i = 0; i<classNumber; i++) { max = index[i]; len = max-min; result = new double[len]; System.arraycopy(data, min, result, 0, len); classList.add(result); min = max; } reComputeList = false; return classList; } /** * <p>Return classes separation index from {@link #data} table.<br/><br/> * for example : caller want class 10 data in 3 distinct class.<br/> * first class second class third class<br/> *   [4]   ...   [7]   ...   [10]<br/> * With ending index is exclusive.</p> * * @return classes separation index from {@link #data} table. */ public int[] getIndex() { if (index == null) throw new IllegalStateException("you must call compute method to fill index table"); return index; } /** * Set data which will be classified. * * @param data which will be classified. */ public void setData(double ...data) { ArgumentChecks.ensureNonNull("data table", data); if (data.length < classNumber) throw new IllegalArgumentException("classNumber will not be able to > dataLenght. dataLenght = "+dataLength); this.data = data; this.dataLength = data.length; this.index = null; } /** * Set class number. * * @param classNumber class number ask by caller. */ public void setClassNumber(int classNumber) { ArgumentChecks.ensureStrictlyPositive("classNumber", classNumber); this.classNumber = classNumber; this.index = null; } /** * Verify there are enough distinct data, from {@link #data} to compute Jenks classes. * * @return true if Jenks computing is possible else false. */ private boolean checkJenksDataValidity() { final int[] tabNbreClass = new int[dataLength]; double currentVal; int nbreSameElmt; for (int idCurrentVal = 0; idCurrentVal < dataLength; idCurrentVal++) { currentVal = data[idCurrentVal]; nbreSameElmt = 1; for (int idtest = 0; idtest < dataLength; idtest++) { if (currentVal == data[idtest] && idCurrentVal != idtest) nbreSameElmt++; } tabNbreClass[nbreSameElmt]++; } int nbreClassMax = 0; for (int i = 1; i<dataLength; i++) { nbreClassMax += tabNbreClass[i]/i; if (nbreClassMax >= classNumber) return true; } return false; } }