/* GeoGebra - Dynamic Mathematics for Everyone http://www.geogebra.org This file is part of GeoGebra. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation. */ package org.geogebra.common.kernel.statistics; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import org.apache.commons.math3.stat.Frequency; import org.geogebra.common.kernel.Construction; import org.geogebra.common.kernel.Kernel; import org.geogebra.common.kernel.StringTemplate; import org.geogebra.common.kernel.advanced.AlgoUnique; import org.geogebra.common.kernel.algos.AlgoElement; import org.geogebra.common.kernel.commands.Commands; import org.geogebra.common.kernel.geos.GeoBoolean; import org.geogebra.common.kernel.geos.GeoElement; import org.geogebra.common.kernel.geos.GeoList; import org.geogebra.common.kernel.geos.GeoNumeric; import org.geogebra.common.kernel.geos.GeoText; import org.geogebra.common.plugin.GeoClass; public class AlgoFrequency extends AlgoElement { private GeoList dataList; // input private GeoList classList; // input private GeoBoolean isCumulative; // input private GeoBoolean useDensity; // input private GeoNumeric density; // input private GeoList frequency; // output // for compute private GeoList value = new GeoList(cons); private String[] contingencyRowValues, contingencyColumnValues; private Boolean isContingencyTable = false; private double scaleFactor; /** * @param cons * @param isCumulative * @param classList * @param dataList */ public AlgoFrequency(Construction cons, GeoBoolean isCumulative, GeoList classList, GeoList dataList) { this(cons, isCumulative, classList, dataList, null, null, null); } /** * @param cons * @param isCumulative * @param classList * @param dataList * @param scale */ public AlgoFrequency(Construction cons, GeoBoolean isCumulative, GeoList classList, GeoList dataList, GeoNumeric scale) { this(cons, isCumulative, classList, dataList, null, null, scale); } /** * @param cons * @param isCumulative * @param classList * @param dataList * @param useDensity * @param density */ public AlgoFrequency(Construction cons, GeoBoolean isCumulative, GeoList classList, GeoList dataList, GeoBoolean useDensity, GeoNumeric density) { this(cons, isCumulative, classList, dataList, useDensity, density, null); } private GeoNumeric scale; /** * @param cons * @param isCumulative * @param classList * @param dataList * @param useDensity * @param density * @param scale * scale factor */ AlgoFrequency(Construction cons, GeoBoolean isCumulative, GeoList classList, GeoList dataList, GeoBoolean useDensity, GeoNumeric density, GeoNumeric scale) { super(cons); this.classList = classList; this.dataList = dataList; this.isCumulative = isCumulative; this.useDensity = useDensity; this.density = density; this.scale = scale; frequency = new GeoList(cons); setInputOutput(); compute(); } /*************************************************** * Contingency table constructor * * @param cons * @param list1 * @param list2 * @param isContingencyTable * (dummy variable) */ public AlgoFrequency(Construction cons, GeoList list1, GeoList list2, boolean isContingencyTable) { super(cons); this.isContingencyTable = isContingencyTable; this.classList = list1; this.dataList = list2; frequency = new GeoList(cons); setInputOutput(); compute(); } @Override public Commands getClassName() { return Commands.Frequency; } @Override protected void setInputOutput() { ArrayList<GeoElement> tempList = new ArrayList<GeoElement>(); if (isCumulative != null) { tempList.add(isCumulative); } if (classList != null) { tempList.add(classList); } tempList.add(dataList); if (useDensity != null) { tempList.add(useDensity); } if (density != null) { tempList.add(density); } if (scale != null) { tempList.add(scale); } input = new GeoElement[tempList.size()]; input = tempList.toArray(input); setOutputLength(1); setOutput(0, frequency); setDependencies(); // done by AlgoElement } public GeoList getResult() { return frequency; } public GeoList getValue() { return value; } public String[] getContingencyRowValues() { return contingencyRowValues; } public String[] getContingencyColumnValues() { return contingencyColumnValues; } @Override public final void compute() { if (isContingencyTable) { computeContingencyTable(); return; } // Validate input arguments // ======================================================= if (!dataList.isDefined() || dataList.size() == 0) { frequency.setUndefined(); return; } if (!(dataList.getElementType().equals(GeoClass.TEXT) || dataList.getElementType().equals(GeoClass.NUMERIC))) { frequency.setUndefined(); return; } if (classList != null) { if (!classList.getElementType().equals(GeoClass.NUMERIC) || classList.size() < 2) { frequency.setUndefined(); return; } } if (density != null) { if (density.getDouble() <= 0) { frequency.setUndefined(); return; } } if (scale != null) { if (!scale.isDefined()) { frequency.setUndefined(); return; } scaleFactor = scale.getValue(); } frequency.setDefined(true); frequency.clear(); if (value != null) { value.clear(); } double numMax = 0, numMin = 0; boolean doCumulative = isCumulative != null && isCumulative.getBoolean(); // Load the data into f, an instance of Frequency class // ======================================================= Frequency f = new FrequencyGgb(); for (int i = 0; i < dataList.size(); i++) { if (dataList.getElementType().equals(GeoClass.TEXT)) { f.addValue(((GeoText) dataList.get(i)) .toValueString(StringTemplate.defaultTemplate)); } if (dataList.getElementType().equals(GeoClass.NUMERIC)) { f.addValue(((GeoNumeric) dataList.get(i)).getDouble()); } } // If classList does not exist, // get the unique value list and compute frequencies for this list // ======================================================= // handle string data if (dataList.getElementType().equals(GeoClass.TEXT)) { Iterator<Comparable<?>> itr = f.valuesIterator(); String strMax = (String) itr.next(); String strMin = strMax; itr = f.valuesIterator(); while (itr.hasNext()) { String s = (String) itr.next(); if (s.compareTo(strMax) > 0) { strMax = s; } if (s.compareTo(strMin) < 0) { strMin = s; } GeoText text = new GeoText(cons); text.setTextString(s); value.add(text); if (classList == null) { if (doCumulative) { addValue(f.getCumFreq(s)); } else { addValue(f.getCount(s)); } } } } // handle numeric data else { Iterator<Comparable<?>> itr = f.valuesIterator(); numMax = (Double) itr.next(); numMin = numMax; itr = f.valuesIterator(); while (itr.hasNext()) { Double n = (Double) itr.next(); if (n > numMax) { numMax = n.doubleValue(); } if (n < numMin) { numMin = n.doubleValue(); } value.add(new GeoNumeric(cons, n)); if (classList == null) { if (doCumulative) { addValue(f.getCumFreq(n)); } else { addValue(f.getCount(n)); } } } } // If classList exists, compute frequencies using the classList // ======================================================= if (classList != null) { double lowerClassBound = 0; double upperClassBound = 0; double classFreq = 0; // set density conditions boolean hasDensity = false; if (useDensity != null) { hasDensity = useDensity.getBoolean(); } double densityValue = 1; // default density if (density != null) { densityValue = density.getDouble(); } double cumulativeClassFreq = 0; double swap; int length = classList.size(); for (int i = 1; i < length; i++) { lowerClassBound = ((GeoNumeric) classList.get(i - 1)) .getDouble(); upperClassBound = ((GeoNumeric) classList.get(i)).getDouble(); // handle roundoff errror in class list values (this is possible // if auto-generated by another cmd) lowerClassBound = Kernel.checkDecimalFraction(lowerClassBound); upperClassBound = Kernel.checkDecimalFraction(upperClassBound); boolean increasing = true; if (lowerClassBound > upperClassBound) { swap = upperClassBound; upperClassBound = lowerClassBound; lowerClassBound = swap; increasing = false; } classFreq = f.getCumFreq(upperClassBound) - f.getCumFreq(lowerClassBound) + f.getCount(lowerClassBound); if ((i != length - 1 && increasing) || (i != 1 && !increasing)) { classFreq -= f.getCount(upperClassBound); } // System.out.println(" ================================="); // System.out.println("class freq: " + classFreq + " " + // density); if (doCumulative) { cumulativeClassFreq += classFreq; } // adjust the frequency and add to the output GeoList double v = doCumulative ? cumulativeClassFreq : classFreq; if (hasDensity) { v = densityValue * v / (upperClassBound - lowerClassBound); } addValue(v); } // handle the last (highest) class frequency specially // it must also count values equal to the highest class bound } } private void addValue(double v) { if (scale != null) { frequency.add(new GeoNumeric(cons, v * scaleFactor)); } else { frequency.add(new GeoNumeric(cons, v)); } } private void computeContingencyTable() { // Validate input arguments if (!dataList.isDefined() || dataList.size() == 0 || !classList.isDefined() || classList.size() == 0) { frequency.setUndefined(); return; } if (!(dataList.getElementType().equals(GeoClass.TEXT) && classList.getElementType().equals(GeoClass.TEXT))) { frequency.setUndefined(); return; } if (dataList.size() != classList.size()) { frequency.setUndefined(); return; } frequency.setDefined(true); frequency.clear(); contingencyRowValues = getUniqueValues(classList); contingencyColumnValues = getUniqueValues(dataList); List<String> rowList = Arrays.asList(contingencyRowValues); List<String> colList = Arrays.asList(contingencyColumnValues); int n1 = contingencyRowValues.length; int n2 = contingencyColumnValues.length; // todo: reuse freqTable? need to init? int[][] freqTable = new int[n1][n2]; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { freqTable[i][j] = 0; } } // compute the frequencies for (int index = 0; index < classList.size(); index++) { // get ordered pair of strings String s1 = ((GeoText) classList.get(index)) .toValueString(StringTemplate.defaultTemplate); String s2 = ((GeoText) dataList.get(index)) .toValueString(StringTemplate.defaultTemplate); // increment frequency element freqTable[rowList.indexOf(s1)][colList.indexOf(s2)]++; } // create the GeoList matrix for (int row = 0; row < n1; row++) { GeoList l = new GeoList(cons); for (int col = 0; col < n2; col++) { l.add(new GeoNumeric(cons, freqTable[row][col])); } frequency.add(l); } } private String[] getUniqueValues(GeoList list) { AlgoUnique al = new AlgoUnique(cons, list); cons.removeFromConstructionList(al); GeoList geo = (GeoList) al.getGeoElements()[0]; String[] s = new String[geo.size()]; for (int i = 0; i < geo.size(); i++) { String a = geo.get(i).toValueString(StringTemplate.defaultTemplate); s[i] = a; } return s; } }