/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.gui.new_plotter.configuration;
import com.rapidminer.datatable.DataTable;
import com.rapidminer.datatable.DataTableRow;
import com.rapidminer.gui.new_plotter.ChartConfigurationException;
import com.rapidminer.gui.new_plotter.listener.events.ValueGroupingChangeEvent;
import com.rapidminer.gui.new_plotter.utility.NumericalValueRange;
import com.rapidminer.gui.new_plotter.utility.ValueRange;
import java.text.DateFormat;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Vector;
/**
* This grouping creates a fixed number of bins. All bins contain the same number of examples. That
* implies that in general the width of the bins is not equal.
*
* Can currently only handle numerical values.
*
*
* @author Marius Helf, Nils Woehler
*
*/
public class EqualDataFractionGrouping extends AbstractValueGrouping {
private int binCount;
private final GroupingType type = GroupingType.EQUAL_DATA_FRACTION;
private Integer distinctValueCount = Integer.MAX_VALUE;
/**
* Creates a new {@link EqualDataFractionGrouping}.
*
* @param dataTableColumn
* the data table column for which the grouping will be created
* @param binCount
* the number of bins in this grouping
* @param categorical
* indicates if this grouping creates categorical groups
* @param dateFormat
* the format to be used to format dates (if dataTableColumn is a date)
* @throws ChartConfigurationException
* if the data table column is nominal (not supported by this grouping)
*/
public EqualDataFractionGrouping(DataTableColumn dataTableColumn, int binCount, boolean categorical,
DateFormat dateFormat) throws ChartConfigurationException {
super(dataTableColumn, categorical, dateFormat);
if (dataTableColumn.isNominal()) {
throw new ChartConfigurationException("grouping.illegal_column_type", getGroupingType().getName(),
dataTableColumn.getName(), dataTableColumn.getValueType(), "numerical or date.");
}
this.binCount = binCount;
}
/**
* Copy constructor
*/
protected EqualDataFractionGrouping(EqualDataFractionGrouping other) {
super(other.getDataTableColumn(), other.isCategorical(), other.getDateFormat());
this.forceDataTableColumn(other.getDataTableColumn());
this.binCount = other.binCount;
}
public int getBinCount() {
return binCount;
}
public void setBinCount(int binCount) {
if (binCount != this.binCount) {
if (binCount < distinctValueCount) {
this.binCount = binCount;
} else {
this.binCount = distinctValueCount;
}
// invalidateCache();
fireGroupingChanged(new ValueGroupingChangeEvent(this, this.binCount));
}
}
@Override
protected List<ValueRange> createGroupingModel(DataTable dataTable, double upperBound, double lowerBound) {
int columnIdx = DataTableColumn.getColumnIndex(dataTable, getDataTableColumn());
Map<Double, Integer> distinctValueCountMap = new HashMap<Double, Integer>();
Vector<Double> sortedDistinctValueList = new Vector<Double>();
int valueCount = 0;
for (DataTableRow row : dataTable) {
Double value = row.getValue(columnIdx);
if (!Double.isNaN(value) && value >= lowerBound && value <= upperBound) {
Integer currentCount = distinctValueCountMap.get(value);
if (currentCount == null) {
distinctValueCountMap.put(value, 1);
sortedDistinctValueList.add(value);
} else {
distinctValueCountMap.put(value, (currentCount + 1));
}
++valueCount;
}
}
Collections.sort(sortedDistinctValueList);
// calculate max bin count
distinctValueCount = distinctValueCountMap.keySet().size();
List<ValueRange> valueGroups = new LinkedList<ValueRange>();
if (sortedDistinctValueList.size() == 0) {
return valueGroups;
}
// check if bin count is lower then max bin count
if (binCount > distinctValueCount) {
setBinCount(distinctValueCount);
}
boolean columnIsDate = dataTable.isDateTime(columnIdx);
double averageBinSize = valueCount / (double) binCount;
int currentUpperIdx = 0;
int valuesUsed = 0;
lowerBound = sortedDistinctValueList.get(0);
upperBound = 0;
// start iterating over data
for (int binIdx = 1; binIdx <= binCount; ++binIdx) {
// calculate values per next bin count
int aimedValueCountForCurrentBin = ((int) Math.round((binIdx) * averageBinSize)) - valuesUsed;
if (aimedValueCountForCurrentBin < 1) {
aimedValueCountForCurrentBin = 1;
}
// number of bins we have to create after the current one
int remainingBins = binCount - binIdx;
// number of values without adding a new distinct value
int valueCountInBin = distinctValueCountMap.get(sortedDistinctValueList.get(currentUpperIdx));
int nextValueCount = 0;
// dvIdx is the last idx which will be included in the range
for (int dvIdx = currentUpperIdx + 1; dvIdx < distinctValueCount; ++dvIdx) {
nextValueCount = valueCountInBin + distinctValueCountMap.get(sortedDistinctValueList.get(dvIdx));
// number of remaining distinct values, if we included dvIdx in current bin
int remainingDistinctValues = distinctValueCount - dvIdx - 1;
boolean enoughRemainingDistinctValues = remainingDistinctValues >= remainingBins;
if (nextValueCount >= aimedValueCountForCurrentBin || !enoughRemainingDistinctValues) {
double currentDifferenceFromAverage = Math.abs(aimedValueCountForCurrentBin - valueCountInBin);
double nextDifferenceFromAverage = Math.abs(aimedValueCountForCurrentBin - nextValueCount);
if ((currentDifferenceFromAverage < nextDifferenceFromAverage || !enoughRemainingDistinctValues)
&& valueCountInBin > 0) {
// add current distinct value to bin
currentUpperIdx = dvIdx;
nextValueCount = valueCountInBin;
} else {
currentUpperIdx = dvIdx + 1;
}
break;
}
valueCountInBin = nextValueCount;
}
if (currentUpperIdx >= distinctValueCount) {
currentUpperIdx = distinctValueCount - 1;
}
upperBound = sortedDistinctValueList.get(currentUpperIdx);
valuesUsed += nextValueCount;
NumericalValueRange currentGroup = new NumericalValueRange(lowerBound, upperBound, columnIdx, null, true,
binIdx == binCount);
valueGroups.add(currentGroup);
lowerBound = upperBound;
}
// set precision for representation
applyAdaptiveVisualRounding(valueGroups, columnIsDate);
return valueGroups;
}
@Override
public GroupingType getGroupingType() {
return type;
}
@Override
public EqualDataFractionGrouping clone() {
return new EqualDataFractionGrouping(this);
}
@Override
public boolean equals(Object obj) {
if (obj == null || !(obj instanceof EqualDataFractionGrouping)) {
return false;
}
EqualDataFractionGrouping tempObj = (EqualDataFractionGrouping) obj;
if (tempObj.isCategorical() != isCategorical()) {
return false;
}
if (tempObj.getBinCount() != getBinCount()) {
return false;
}
return true;
}
@Override
public boolean definesUpperLowerBounds() {
return true;
}
}