/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table.internal; import java.io.Serializable; import java.util.Arrays; import com.rapidminer.example.utils.ExampleSetBuilder.DataManagement; /** * {@link Column} that stores double values in chunks. The chunks can either be sparse or dense and * switch automatically to the appropriate format for the given values. For this column, * {@link #complete()} must be called when the size is final before {@link #get(int)} or * {@link #set(int, double)} is called. * * @author Gisa Schaefer * @since 7.3.1 */ final class DoubleAutoColumn implements Column { private static final long serialVersionUID = 1L; /** * Building block of a {@link DoubleAutoColumn}. * * @author Gisa Schaefer * */ static abstract class DoubleAutoChunk implements Serializable { private static final long serialVersionUID = 1L; /** * the position of this chunk in {@link DoubleAutoColumn#chunks} */ final int id; /** * the chunk array {@link DoubleAutoColumn#chunks} */ final DoubleAutoChunk[] chunks; /** * decides about sparsity thresholds */ final DataManagement management; DoubleAutoChunk(int id, DoubleAutoChunk[] chunks, DataManagement management) { this.id = id; this.chunks = chunks; this.management = management; } /** * Sets the value at the specified row to the given value. This assumes that row is the * largest index set so far and no value for this row was set before. The necessary space * must be allocated by {@link #ensure(int)} beforehand. * * @param row * the row that should be set * @param value * the value that should be set at the row */ abstract void setLast(int row, double value); /** * Ensures that the internal data structure can hold up to {@code size} values. * * @param size * the size that should be ensured */ abstract void ensure(int size); /** * Gets the value at the specified row. * * @param row * the row that should be looked up * @return the value at the specified row */ abstract double get(int row); /** * Sets the value at the specified row to the given value. * * @param row * the row that should be set * @param value * the value that should be set at the row */ abstract void set(int row, double value); /** * Signals that no further calls to {@link #ensure(int)} and {@link #setLast(int,double)} * will be made. */ void complete() {} } private DoubleAutoChunk[] chunks = new DoubleAutoChunk[1]; private int chunkCount = 0; private int ensuredSize = 0; private final DataManagement management; /** * Constructs a column with enough chunks to fit size values. * * @param size * the size of the column */ DoubleAutoColumn(int size, DataManagement management) { this.management = management; ensure(size); } /** * {@inheritDoc} * * Must not be called before {@link #complete()} was called. */ @Override public double get(int row) { return chunks[row >> AutoColumnUtils.CHUNK_SIZE_EXP].get(row & AutoColumnUtils.CHUNK_MODULO_MASK); } /** * {@inheritDoc} * * Must not be called before {@link #complete()} was called. */ @Override public void set(int row, double value) { chunks[row >> AutoColumnUtils.CHUNK_SIZE_EXP].set(row & AutoColumnUtils.CHUNK_MODULO_MASK, value); } @Override public void setLast(int row, double value) { chunks[row >> AutoColumnUtils.CHUNK_SIZE_EXP].setLast(row & AutoColumnUtils.CHUNK_MODULO_MASK, value); } @Override public void ensure(int size) { ensureChunks(size); int completeChunks = 0; boolean enlargeLastChunk = false; if (chunkCount > 0) { if (ensuredSize % AutoColumnUtils.CHUNK_SIZE > 0) { completeChunks = chunkCount - 1; enlargeLastChunk = true; } else { completeChunks = chunkCount; } } int rowsLeft = size - completeChunks * AutoColumnUtils.CHUNK_SIZE; while (rowsLeft > 0) { int chunkSize = Math.min(rowsLeft, AutoColumnUtils.CHUNK_SIZE); if (enlargeLastChunk) { chunks[chunkCount - 1].ensure(chunkSize); enlargeLastChunk = false; } else { if (management == DataManagement.MEMORY_OPTIMIZED) { // create sparse chunk with guessed default value 0 DoubleAutoSparseChunk sparse = new DoubleAutoSparseChunk(chunks, chunkCount, 0, management); sparse.hasGuessedDefault(); sparse.ensure(chunkSize); chunks[chunkCount] = sparse; } else { chunks[chunkCount] = new DoubleAutoDenseChunk(chunks, chunkCount, chunkSize, management); } chunkCount++; } rowsLeft -= chunkSize; } ensuredSize = size; } /** * Ensures that the chunks array is big enough to hold all chunks. * * @param numberOfRows * the number of rows that should be stored in the chunks */ private void ensureChunks(int numberOfRows) { int chunksNeeded = numberOfRows / AutoColumnUtils.CHUNK_SIZE + 1; if (chunksNeeded <= chunks.length) { return; } int chunksMinGrowth = chunks.length == 1 ? 2 : chunks.length + (chunks.length >> 1); int newLength = Math.min(AutoColumnUtils.MAXIMAL_CHUNKS, Math.max(chunksNeeded, chunksMinGrowth)); chunks = Arrays.copyOf(chunks, newLength); } @Override public void complete() { for (int i = 0; i < chunkCount; i++) { chunks[i].complete(); } } }