/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.util.Arrays;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.SparseDataRow;
import com.rapidminer.operator.UserError;
/**
* This class can be used for the efficient generation of sparse example
* formats. The constructor creates a mapping between example set and example
* table attribute indices which only need to be performed once. The sparse data
* can then be queried efficiently by using the methods
* {@link #getNonDefaultAttributeIndices(Example)} and
* {@link #getNonDefaultAttributeValues(Example)}. Please note that this filter
* should be reinstatiated for new example sets. Furthermore, a gain in
* performance is only achieved for examples with underlying
* {@link SparseDataRow}s.
*
* @author Julien Nioche, Ingo Mierswa
* ingomierswa Exp $
*/
public class FastExample2SparseTransform {
/**
* The mapping between the attribute indices in the data row / example table
* on the attribute indices of the given example set. This mapping is only
* necessary for examples backed up by {@link SparseDataRow}s.
*/
private int[] mapping;
/** This attribute array is necessary since in example sets only iterators are
* provided for attributes. */
private Attribute[] attributes;
/** The complete array of all attribute indices which can be used for
* data rows which do not implement {@link SparseDataRow}. */
private int[] allIndices;
/**
* Returns for a table giving the equivalence between the positions of the
* Attributes in the ExampleTable and the number of the regular Attributes in
* the ExampleSet. A value of -1 indicates that the Attribute is not regular
* or has been deleted (is null). This is used in order to optimize the access
* to sparse DataRows (e.g. SVM implementations or for Weka), which is important
* when the number of Attributes is large.
* @throws UserError
*/
public FastExample2SparseTransform(ExampleSet es) throws UserError {
// init
this.mapping = new int[es.getExampleTable().getNumberOfAttributes()];
for (int i = 0; i < mapping.length; i++)
mapping[i] = -1;
//create mappings
int pos = 0;
this.attributes = new Attribute[es.getAttributes().size()];
this.allIndices = new int[es.getAttributes().size()];
for (Attribute attribute : es.getAttributes()) {
int tableIndex = attribute.getTableIndex();
if (tableIndex != Attribute.VIEW_ATTRIBUTE_INDEX) {
this.mapping[attribute.getTableIndex()] = pos;
this.attributes[pos] = attribute;
this.allIndices[pos] = pos;
pos++;
} else {
throw new UserError(null, 140);
}
}
// trim is necessary in order to allow fast mapping!
for (Example e : es) {
e.getDataRow().trim();
}
}
/**
* Returns a list with the indices of the regular Attributes with non-default
* values. This can be used for a faster construction of sparse dataset
* representations when the number of Attributes is large. The positions of
* attributes are sorted by ascending number.
*/
public int[] getNonDefaultAttributeIndices(Example example) {
int numberNonDefaultAttributes = 0;
DataRow data = example.getDataRow();
if (data instanceof SparseDataRow) {
int[] nonDefaultInd = ((SparseDataRow) (data)).getNonDefaultIndices();
int[] tempArray = new int[nonDefaultInd.length];
// map between the positive indices in the table
// and the corresponding attribute positions
for (int i = 0; i < nonDefaultInd.length; i++) {
int nextPos = mapping[nonDefaultInd[i]];
if (nextPos != -1) {
tempArray[numberNonDefaultAttributes++] = nextPos;
}
}
// trim the table and sort it
int[] finalArray = new int[numberNonDefaultAttributes];
System.arraycopy(tempArray, 0, finalArray, 0, numberNonDefaultAttributes);
// the positions have to be sorted for the sparse data
Arrays.sort(finalArray);
return finalArray;
} else {
// default behaviour for other DataRows
return allIndices;
}
}
/**
* Returns an array of non-default values of the given example. These are only
* the values of regular attributes. Simply invokes
* {@link #getNonDefaultAttributeValues(Example, int[])}
* with the array of non-default indices for the given example.
*/
public double[] getNonDefaultAttributeValues(Example example) {
return getNonDefaultAttributeValues(example, getNonDefaultAttributeIndices(example));
}
/**
* Returns an array of non-default values of the given example. These are only
* the values of regular attributes. The size of the returned array is the
* same as the size of the given indices array.
*/
public double[] getNonDefaultAttributeValues(Example example, int[] nonDefaultIndices) {
double[] result = new double[nonDefaultIndices.length];
for (int i = 0; i < result.length; i++) {
result[i] = example.getValue(this.attributes[nonDefaultIndices[i]]);
}
return result;
}
}