/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table; import com.rapidminer.example.Tools; /** * Implementation of DataRow that is backed by primitive arrays. Should always * be used if more than 50% of the data is sparse. As fast (or even faster than * map implementation) but needs considerably less memory. * * @author Niraj Aswani, Julien Nioche, Ingo Mierswa, Shevek * @version $Id: AbstractSparseArrayDataRow.java,v 2.10 2006/03/21 15:35:39 ingomierswa * Exp $ */ public abstract class AbstractSparseArrayDataRow extends DataRow implements SparseDataRow { /** * */ private static final long serialVersionUID = 4946925205115859758L; /** Stores the used attribute indices. */ private int[] x; /** Number of inserted elements. */ private int counter = 0; /** Creates an empty sparse array data row with size 0. */ public AbstractSparseArrayDataRow() { this(0); } /* Note: DataRowFactory calls this with attributes.size, * which is probably wrong if the data row is meant to be * sparse - we never intend to have that many attributes. */ /** Creates a sparse array data row of the given size. */ public AbstractSparseArrayDataRow(int size) { x = new int[size]; for (int i = 0; i < x.length; i++) { x[i] = Integer.MAX_VALUE; } } // ====================== // abstract methods // ====================== /* Implementations of this are not as optimal as the removal * of a value from the indexes array, since the subclass * does not have access to the count variable. This could * be fixed, but ... *shrug*. Shevek. */ protected abstract void removeValue(int index); protected abstract void resizeValues(int length); protected abstract void setValue(int index, double value); protected abstract double getValue(int index); /* This could be implemented using get() and set() */ protected abstract void swapValues(int a, int b); /* This could be implemented using get() */ protected abstract double[] getAllValues(); /** Sorts the arrays in the given range. */ private void sort(int off, int len) { // Insertion sort on smallest arrays if (len < 7) { for (int i = off; i < len + off; i++) for (int j = i; j > off && x[j - 1] > x[j]; j--) swap(j, j - 1); return; } // Choose a partition element, v int m = off + (len >> 1); // Small arrays, middle element if (len > 7) { int l = off; int n = off + len - 1; if (len > 40) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(l, l + s, l + 2 * s); m = med3(m - s, m, m + s); n = med3(n - 2 * s, n - s, n); } m = med3(l, m, n); // Mid-size, med of 3 } long v = x[m]; // Establish Invariant: v* (<v)* (>v)* v* int a = off, b = a, c = off + len - 1, d = c; while (true) { while (b <= c && x[b] <= v) { if (x[b] == v) swap(a++, b); b++; } while (c >= b && x[c] >= v) { if (x[c] == v) swap(c, d--); c--; } if (b > c) break; swap(b++, c--); } // Swap partition elements back to middle int s, n = off + len; s = Math.min(a - off, b - a); vecswap(off, b - s, s); s = Math.min(d - c, n - d - 1); vecswap(b, n - s, s); // Recursively sort non-partition-elements if ((s = b - a) > 1) sort(off, s); if ((s = d - c) > 1) sort(n - s, s); } /** Swaps the next n elements from a and b. */ private void vecswap(int a, int b, int n) { for (int i = 0; i < n; i++, a++, b++) swap(a, b); } private int med3(int a, int b, int c) { return (x[a] < x[b] ? (x[b] < x[c] ? b : x[a] < x[c] ? c : a) : (x[b] > x[c] ? b : x[a] > x[c] ? c : a)); } /** * Swaps x[a] with x[b]. */ protected void swap(int a, int b) { int t = x[a]; x[a] = x[b]; x[b] = t; swapValues(a, b); } /** Returns the desired data for the given attribute. */ protected double get(int val, double defaultValue) { int index = java.util.Arrays.binarySearch(x, val); if (index < 0) { return defaultValue; } else { return getValue(index); } } /** Sets the given data for the given attribute. */ protected void set(int index, double value, double defaultValue) { // first search if it is already available // we need to replace the value // return a negative int if the value is not in the array // the list is ALWAYS sorted int index1 = java.util.Arrays.binarySearch(x, index); if (Tools.isDefault(defaultValue, value)) { if (index1 >= 0) { // (old value != deflt) AND new is default --> // remove entry from arrays System.arraycopy(x, index1 + 1, x, index1, (counter - (index1 + 1))); x[counter - 1] = Integer.MAX_VALUE; removeValue(index1); counter--; } } else { if (index1 < 0) { // a new entry if (counter >= x.length) { // need more space int newlength = x.length + (x.length >> 1) + 1; int[] y = new int[newlength]; System.arraycopy(x, 0, y, 0, x.length); for (int i = x.length; i < y.length; i++) y[i] = Integer.MAX_VALUE; x = y; resizeValues(newlength); } // adds the new value at the end of the array x[counter] = index; setValue(counter, value); // compare to the one before him if ((counter > 0) && (index < x[counter - 1])) { sort(0, x.length); } counter++; } else { // replace existing value setValue(index1, value); } } } public int[] getNonDefaultIndices() { trim(); return this.x; } public double[] getNonDefaultValues() { trim(); return getAllValues(); } /** Does nothing. */ public void ensureNumberOfColumns(int numberOfColumns) {} /** Trims the data row to the number of actually used elements. */ public void trim() { if (counter < x.length) { int[] y = new int[counter]; System.arraycopy(x, 0, y, 0, counter); x = y; resizeValues(counter); } } /** Returns a string representation of the data row. */ public String toString() { StringBuffer result = new StringBuffer(); for (int i = 0; i < x.length; i++) { if (i != 0) result.append(", "); result.append(x[i] + ":" + getValue(i)); } result.append(", counter: " + counter); return result.toString(); } }