/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example.table.internal;
import java.util.Arrays;
import com.rapidminer.example.table.internal.AutoColumnUtils.DensityResult;
import com.rapidminer.example.table.internal.IntegerAutoColumn.IntegerAutoChunk;
import com.rapidminer.example.utils.ExampleSetBuilder.DataManagement;
/**
* Dense {@link IntegerAutoChunk} for integer value data inside a {@link IntegerAutoColumn}.
*
* @author Gisa Schaefer
* @since 7.3.1
*/
final class IntegerAutoDenseChunk extends IntegerAutoChunk {
private static final long serialVersionUID = 1L;
private boolean undecided = true;
private int ensuredSize;
private int[] data = AutoColumnUtils.EMPTY_INTEGER_ARRAY;
IntegerAutoDenseChunk(int id, IntegerAutoChunk[] chunks, int size, DataManagement management) {
super(id, chunks, management);
ensure(size);
}
@Override
double get(int row) {
int value = data[row];
return value == AutoColumnUtils.INTEGER_NAN ? Double.NaN : value;
}
@Override
void set(int row, double value) {
data[row] = Double.isNaN(value) ? AutoColumnUtils.INTEGER_NAN : (int) value;
}
@Override
void ensure(int size) {
ensuredSize = size;
int newSize = size;
if (undecided) {
newSize = Math.min(size, AutoColumnUtils.THRESHOLD_CHECK_FOR_SPARSE);
// several ensures can happen while still undecided
if (newSize == data.length) {
return;
}
}
data = Arrays.copyOf(data, newSize);
}
@Override
void setLast(int row, double value) {
data[row] = Double.isNaN(value) ? AutoColumnUtils.INTEGER_NAN : (int) value;
if (undecided && row == AutoColumnUtils.THRESHOLD_CHECK_FOR_SPARSE - 1) {
undecided = false;
checkSparse();
}
}
/**
* Finds the most frequent value in the values set until now. If this value if frequent enough,
* it changes to a sparse representation.
*/
private void checkSparse() {
DensityResult result = AutoColumnUtils.checkDensity(data);
double thresholdDensity = management == DataManagement.AUTO ? AutoColumnUtils.THRESHOLD_HIGH_SPARSITY_DENSITY
: AutoColumnUtils.THRESHOLD_INTEGER_MEDIUM_SPARSITY_DENSITY;
if (result.density < thresholdDensity) {
double defaultValue = result.mostFrequentValue;
IntegerAutoChunk sparse = new IntegerAutoSparseChunk(id, chunks, defaultValue, management);
sparse.ensure(ensuredSize);
boolean isNaN = Double.isNaN(defaultValue);
for (int i = 0; i < AutoColumnUtils.THRESHOLD_CHECK_FOR_SPARSE; i++) {
double value = data[i];
// only set non-default values
if (isNaN ? value != AutoColumnUtils.INTEGER_NAN : value != defaultValue) {
sparse.set(i, value);
}
}
chunks[id] = sparse;
} else {
ensure(ensuredSize);
}
}
@Override
void complete() {
if (data.length < ensuredSize) {
data = Arrays.copyOf(data, ensuredSize);
}
undecided = false;
}
}