/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example.table.internal;
import java.io.Serializable;
import java.util.Arrays;
import com.rapidminer.example.Tools;
/**
* Super class for sparse chunks with high sparsity (e.g., few non-default values). Stores only
* values different from the default value. {@link #set(int, double)} returns {@code true} if the
* chunk is filled more than {@link AutoColumnUtils#THRESHOLD_HIGH_SPARSITY_MAXIMAL_DENSITY}. Should only
* be extended by two classes in order to be fast.
*
* @author Jan Czogalla
* @since 7.3.1
*/
abstract class AbstractHighSparsityChunk implements Serializable {
private static final long serialVersionUID = 1L;
private static final int MIN_NON_EMPTY_SIZE = 8;
private int[] indices = AutoColumnUtils.EMPTY_INTEGER_ARRAY;
protected int valueCount;
private double defaultValue;
private int ensuredCount;
AbstractHighSparsityChunk(double defaultValue) {
this.defaultValue = defaultValue;
}
/**
* Returns the value stored for this row.
*
* @param row
* the row for which to obtain the stored value
* @return the value stored for row
*/
public final double get(int row) {
int index = getIndex(row);
return index < 0 ? defaultValue : getValue(index);
}
/**
* Sets the value for the given row. Returns {@code true} if after this set the sparse chunk is
* too full, i.e. its density is bigger than
* {@link AutoColumnUtils#THRESHOLD_HIGH_SPARSITY_MAXIMAL_DENSITY}. Note that the density check only
* works if the total size was {@link #ensure}d before.
*
* @param row
* the row for which to set the value
* @param value
* the value to store
* @return {@code true} if the maximal density is reached
*/
public final boolean set(int row, double value) {
int index = getIndex(row);
if (Tools.isDefault(defaultValue, value)) {
// index not set, default value => do nothing
if (index < 0) {
return false;
}
// remove existing index
removeIndex(index);
return false;
}
boolean tooFull = false;
if (index < 0) {
// insert new index
// see Arrays.binarySearch
index = -index - 1;
insertIndex(index);
// check density
if (valueCount / (double) ensuredCount > AutoColumnUtils.THRESHOLD_HIGH_SPARSITY_MAXIMAL_DENSITY) {
tooFull = true;
}
}
// set index
indices[index] = row;
// set value in base column
setValue(index, value);
return tooFull;
}
/**
* The index returned by binary search for the row in {@link #indices}. Returns only a positive
* number if the row was found. If the row was not found the returned negative index encodes
* where to insert this new row (see {@link Arrays#binarySearch}).
*
* @param row
* the row to search for
* @return the index where row is found or a negative index
*/
private int getIndex(int row) {
// if new row is bigger than the biggest or no row inserted yet,
// no binary search is necessary
if (valueCount == 0 || row > indices[valueCount - 1]) {
return -valueCount - 1;
}
return Arrays.binarySearch(indices, 0, valueCount, row);
}
/**
* Sets the total size.
*
* @param size
* the expected size
*/
public final void ensure(int size) {
ensuredCount = size;
}
/**
* Grows and shifts the indices and value arrays so that there is a new place at index.
*/
private void insertIndex(int index) {
int[] tmp = checkedGrow();
AutoColumnUtils.copy(indices, tmp, index, index, index + 1, valueCount);
indices = tmp;
insertValueIndex(index, indices.length);
valueCount++;
}
/**
* Removes the given index from the indices and value arrays.
*/
private void removeIndex(int index) {
int[] tmp = checkedShrink();
AutoColumnUtils.copy(indices, tmp, index, index + 1, index, tmp.length);
indices = tmp;
removeValueIndex(index, indices.length);
// overwrite duplicate last row with MAX_VALUE
indices[indices.length - 1] = Integer.MAX_VALUE;
valueCount--;
}
/**
* Enlarges the {@link #indices} array if necessary.
*/
private int[] checkedGrow() {
int length = indices.length;
if (valueCount < length) {
return indices;
}
// grow
int newLength = length == 0 ? MIN_NON_EMPTY_SIZE : length + (length >> 1);
return new int[newLength];
}
/**
* Checks if the {@link #indices} array is too empty and shrinks it if necessary.
*/
private int[] checkedShrink() {
int length = indices.length;
if (length >> 1 >= MIN_NON_EMPTY_SIZE && valueCount - 1 <= length >> 2) {
// shrink
return new int[length >> 1];
}
return indices;
}
/**
* Removes the given index from the values array and sets its length.
*
* @param index
* the index to remove
* @param length
* the desired array length
*/
abstract void removeValueIndex(int index, int length);
/**
* Inserts a new place in the values array at the given index and ensures that the array has the
* given length.
*
* @param index
* the index to insert
* @param length
* the desired array length
*/
abstract void insertValueIndex(int index, int length);
/**
* Returns the value stored at the given index.
*
* @param index
* the index to look up
* @return the value for the index
*/
abstract double getValue(int index);
/**
* Sets the value at position index of the values array.
*
* @param index
* the index where to set the value
* @param value
* the value to store
*/
abstract void setValue(int index, double value);
}