FastExample2SparseTransform.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.example;

import java.util.Arrays;

import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.SparseDataRow;
import com.rapidminer.operator.UserError;

/**
 * This class can be used for the efficient generation of sparse example
 * formats. The constructor creates a mapping between example set and example
 * table attribute indices which only need to be performed once. The sparse data
 * can then be queried efficiently by using the methods
 * {@link #getNonDefaultAttributeIndices(Example)} and
 * {@link #getNonDefaultAttributeValues(Example)}. Please note that this filter
 * should be reinstatiated for new example sets. Furthermore, a gain in
 * performance is only achieved for examples with underlying
 * {@link SparseDataRow}s.
 *
 * @author Julien Nioche, Ingo Mierswa
 * @version $Id: FastExample2SparseTransform.java,v 2.2 2006/03/21 15:35:39
 *          ingomierswa Exp $
 */
public class FastExample2SparseTransform {

	/**
	 * The mapping between the attribute indices in the data row / example table
	 * on the attribute indices of the given example set. This mapping is only
	 * necessary for examples backed up by {@link SparseDataRow}s.
	 */
	private int[] mapping;

	/** This attribute array is necessary since in example sets only iterators are
	 *  provided for attributes. */
	private Attribute[] attributes;
	
	/** The complete array of all attribute indices which can be used for 
	 *  data rows which do not implement {@link SparseDataRow}. */
	private int[] allIndices;
	
	
	/**
	 * Returns for a table giving the equivalence between the positions of the
	 * Attributes in the ExampleTable and the number of the regular Attributes in
	 * the ExampleSet. A value of -1 indicates that the Attribute is not regular
	 * or has been deleted (is null). This is used in order to optimize the access 
	 * to sparse DataRows (e.g. SVM implementations or for Weka), which is important 
	 * when the number of Attributes is large.
	 * @throws UserError 
	 */
	public FastExample2SparseTransform(ExampleSet es) throws UserError {
		// init
		this.mapping = new int[es.getExampleTable().getNumberOfAttributes()];
		for (int i = 0; i < mapping.length; i++)
			mapping[i] = -1;
		
		//create mappings
		int pos = 0;
		this.attributes = new Attribute[es.getAttributes().size()];
		this.allIndices = new int[es.getAttributes().size()];
		for (Attribute attribute : es.getAttributes()) {
			int tableIndex = attribute.getTableIndex();
			if (tableIndex != Attribute.VIEW_ATTRIBUTE_INDEX) {
				this.mapping[attribute.getTableIndex()] = pos;
				this.attributes[pos] = attribute;
				this.allIndices[pos] = pos;
				pos++;
			} else {
				throw new UserError(null, 140);
			}
		}
		
        // trim is necessary in order to allow fast mapping!
		for (Example e : es) {
            e.getDataRow().trim();
        }
	}

	/**
	 * Returns a list with the indices of the regular Attributes with non-default
	 * values. This can be used for a faster construction of sparse dataset
	 * representations when the number of Attributes is large. The positions of
	 * attributes are sorted by ascending number.
	 */
	public int[] getNonDefaultAttributeIndices(Example example) {
		int numberNonDefaultAttributes = 0;
		DataRow data = example.getDataRow();
		if (data instanceof SparseDataRow) {
			int[] nonDefaultInd = ((SparseDataRow) (data)).getNonDefaultIndices();
			int[] tempArray = new int[nonDefaultInd.length];
			// map between the positive indices in the table
			// and the corresponding attribute positions
			for (int i = 0; i < nonDefaultInd.length; i++) {
				int nextPos = mapping[nonDefaultInd[i]];
				if (nextPos != -1) {
					tempArray[numberNonDefaultAttributes++] = nextPos;
				}
			}
			// trim the table and sort it
			int[] finalArray = new int[numberNonDefaultAttributes];
			System.arraycopy(tempArray, 0, finalArray, 0, numberNonDefaultAttributes);
			// the positions have to be sorted for the sparse data
			Arrays.sort(finalArray);
			return finalArray;
		} else {
			//  default behaviour for other DataRows
			return allIndices;
		}
	}

	/**
	 * Returns an array of non-default values of the given example. These are only
	 * the values of regular attributes. Simply invokes
	 * {@link #getNonDefaultAttributeValues(Example, int[])}
	 * with the array of non-default indices for the given example.
	 */
	public double[] getNonDefaultAttributeValues(Example example) {
		return getNonDefaultAttributeValues(example, getNonDefaultAttributeIndices(example));
	}

	/**
	 * Returns an array of non-default values of the given example. These are only
	 * the values of regular attributes. The size of the returned array is the
	 * same as the size of the given indices array.
	 */
	public double[] getNonDefaultAttributeValues(Example example, int[] nonDefaultIndices) {
		double[] result = new double[nonDefaultIndices.length];
		for (int i = 0; i < result.length; i++) {
			result[i] = example.getValue(this.attributes[nonDefaultIndices[i]]);
		}
		return result;
	}
}