/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.datatable; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import com.rapidminer.tools.Tools; /** * A simple data table implementation which stores the data itself. * * @author Ingo Mierswa, Simon Fischer */ public class SimpleDataTable extends AbstractDataTable implements Serializable { private static final long serialVersionUID = 4459570725439894361L; private List<DataTableRow> data = new ArrayList<DataTableRow>(); private String[] columns; private double[] weights; private boolean[] specialColumns; private Map<Integer, Map<Integer,String>> index2StringMap = new HashMap<Integer,Map<Integer,String>>(); private Map<Integer, Map<String,Integer>> string2IndexMap = new HashMap<Integer,Map<String,Integer>>(); private int[] currentIndices; public SimpleDataTable(String name, String[] columns) { this(name, columns, null); } public SimpleDataTable(String name, String[] columns, double[] weights) { super(name); this.columns = columns; this.weights = weights; this.specialColumns = new boolean[columns.length]; for (int i = 0; i < this.specialColumns.length; i++) this.specialColumns[i] = false; this.currentIndices = new int[columns.length]; for (int i = 0; i < currentIndices.length; i++) { currentIndices[i] = 0; } } private SimpleDataTable(SimpleDataTable simpleDataTable) { super(simpleDataTable.getName()); this.columns = null; if (simpleDataTable.columns != null) { this.columns = new String[simpleDataTable.columns.length]; for (int i = 0; i < simpleDataTable.columns.length; i++) { this.columns[i] = simpleDataTable.columns[i]; } } this.weights = null; if (simpleDataTable.weights != null) { this.weights = new double[simpleDataTable.weights.length]; for (int i = 0; i < simpleDataTable.weights.length; i++) { this.weights[i] = simpleDataTable.weights[i]; } } this.specialColumns = null; if (simpleDataTable.specialColumns != null) { this.specialColumns = new boolean[simpleDataTable.specialColumns.length]; for (int i = 0; i < simpleDataTable.specialColumns.length; i++) { this.specialColumns[i] = simpleDataTable.specialColumns[i]; } } this.currentIndices = new int[simpleDataTable.currentIndices.length]; for (int i = 0; i < this.currentIndices.length; i++) this.currentIndices[i] = simpleDataTable.currentIndices[i]; this.index2StringMap = new HashMap<Integer,Map<Integer,String>>(); for (Map.Entry<Integer, Map<Integer,String>> entry : simpleDataTable.index2StringMap.entrySet()) { Integer key = entry.getKey(); Map<Integer, String> indexMap = entry.getValue(); Map<Integer, String> newIndexMap = new HashMap<Integer, String>(); for (Map.Entry<Integer, String> innerEntry : indexMap.entrySet()) { newIndexMap.put(innerEntry.getKey(), innerEntry.getValue()); } this.index2StringMap.put(key, newIndexMap); } this.string2IndexMap = new HashMap<Integer,Map<String,Integer>>(); for (Map.Entry<Integer, Map<String,Integer>> entry : simpleDataTable.string2IndexMap.entrySet()) { Integer key = entry.getKey(); Map<String, Integer> indexMap = entry.getValue(); Map<String, Integer> newIndexMap = new HashMap<String, Integer>(); for (Map.Entry<String, Integer> innerEntry : indexMap.entrySet()) { newIndexMap.put(innerEntry.getKey(), innerEntry.getValue()); } this.string2IndexMap.put(key, newIndexMap); } } public int getNumberOfSpecialColumns() { int counter = 0; for (boolean b : specialColumns) if (b) counter++; return counter; } public boolean isSpecial(int index) { return specialColumns[index]; } public void setSpecial(int index, boolean special) { this.specialColumns[index] = special; } public boolean isNominal(int column) { return (index2StringMap.get(column) != null); } public boolean isDate(int index) { return false; } public boolean isTime(int index) { return false; } public boolean isDateTime(int index) { return false; } public boolean isNumerical(int index) { return !isNominal(index); } public String mapIndex(int column, int index) { Map<Integer,String> columnIndexMap = index2StringMap.get(column); return columnIndexMap.get(index); } public int mapString(int column, String value) { Map<String,Integer> columnValueMap = string2IndexMap.get(column); if (columnValueMap == null) { columnValueMap = new HashMap<String,Integer>(); columnValueMap.put(value, currentIndices[column]); string2IndexMap.put(column, columnValueMap); Map<Integer,String> columnIndexMap = new HashMap<Integer,String>(); columnIndexMap.put(currentIndices[column], value); index2StringMap.put(column, columnIndexMap); int returnValue = currentIndices[column]; currentIndices[column]++; return returnValue; } else { Integer result = columnValueMap.get(value); if (result != null) { return result.intValue(); } else { int newIndex = currentIndices[column]; columnValueMap.put(value, newIndex); Map<Integer,String> columnIndexMap = index2StringMap.get(column); columnIndexMap.put(newIndex, value); currentIndices[column]++; return newIndex; } } } public int getNumberOfValues(int column) { return index2StringMap.get(column).size(); } public void cleanMappingTables() { Map<Integer, Set<String>> allValues = new HashMap<Integer, Set<String>>(); for (Map.Entry<Integer, Map<String,Integer>> entry : this.string2IndexMap.entrySet()) { Integer key = entry.getKey(); Set<String> columnValues = new HashSet<String>(); for (String current : entry.getValue().keySet()) { columnValues.add(current); } allValues.put(key, columnValues); } for (DataTableRow row : this) { for (int i = 0; i < getNumberOfColumns(); i++) { if (isNominal(i)) { String currentValue = getValueAsString(row, i); allValues.get(i).remove(currentValue); } } } for (int i = 0; i < getNumberOfColumns(); i++) { Set<String> toDelete = allValues.get(i); if (toDelete != null) { Map<String, Integer> string2Index = this.string2IndexMap.get(i); Map<Integer, String> index2String = this.index2StringMap.get(i); for (String current : toDelete) { int oldIndex = string2Index.get(current); index2String.remove(oldIndex); string2Index.remove(current); } } } } public boolean isSupportingColumnWeights() { return weights != null; } public double getColumnWeight(int column) { if (weights == null) return Double.NaN; else return weights[column]; } public String getColumnName(int i) { return columns[i]; } public int getColumnIndex(String name) { for (int i = 0; i < columns.length; i++) { if (columns[i].equals(name)) return i; } return -1; } public int getNumberOfColumns() { return columns.length; } @Override public String[] getColumnNames() { return columns; } public synchronized void add(DataTableRow row) { synchronized (data) { data.add(row); fireEvent(); } } public synchronized void remove(DataTableRow row) { synchronized (data) { data.remove(row); fireEvent(); } } public DataTableRow getRow(int index) { return data.get(index); } public synchronized Iterator<DataTableRow> iterator() { Iterator<DataTableRow> i = null; synchronized (data) { i = data.iterator(); } return i; } public int getNumberOfRows() { int result = 0; synchronized (data) { result = data.size(); } return result; } public void clear() { data.clear(); fireEvent(); } public synchronized DataTable sample(int newSize) { if (getNumberOfRows() <= newSize) { return this; } else { SimpleDataTable result = new SimpleDataTable(this); // must be a usual random since otherwise plotting would change the rest of // the process during a breakpoint result viewing Random random = new Random(); List<Integer> indices = new ArrayList<Integer>(getNumberOfRows()); for (int i = 0; i < getNumberOfRows(); i++) indices.add(i); while (result.getNumberOfRows() < newSize) { int index = random.nextInt(indices.size()); result.add(data.get(indices.remove(index))); } return result; } } /** Dumps the complete table into a string (complete data!). */ @Override public String toString() { StringBuffer result = new StringBuffer(); for (DataTableRow row : this) { for (int i = 0; i < getNumberOfColumns(); i++) { if (i != 0) result.append(", "); result.append(row.getValue(i)); } result.append(Tools.getLineSeparator()); } return result.toString(); } }