/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.discretization;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.Map.Entry;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.SimpleAttributes;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.NominalMapping;
import com.rapidminer.example.table.PolynominalMapping;
import com.rapidminer.example.table.ViewAttribute;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.Tupel;
/**
* The generic discretization model.
*
* @author Sebastian Land
* @version $Id: DiscretizationModel.java,v 1.14 2008/07/01 13:20:38 stiefelolm Exp $
*/
public class DiscretizationModel extends PreprocessingModel {
private static final long serialVersionUID = -8732346419946567062L;
private HashMap<String, SortedSet<Tupel<Double, String>>> rangesMap;
private Set<String> attributeNames;
private boolean removeUseless = true;
protected DiscretizationModel(ExampleSet exampleSet) {
this(exampleSet, true);
}
protected DiscretizationModel(ExampleSet exampleSet, boolean removeUseless) {
super(exampleSet);
attributeNames = new HashSet<String>();
for (Attribute attribute : exampleSet.getAttributes()) {
if (attribute.isNumerical()) {
attributeNames.add(attribute.getName());
}
}
this.removeUseless = removeUseless;
}
public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
// creating new nominal attributes for numerical ones
Map<Attribute, Attribute> replacementMap = new LinkedHashMap<Attribute, Attribute>();
ExampleTable table = exampleSet.getExampleTable();
for (Attribute attribute: exampleSet.getAttributes()) {
if (attribute.isNumerical()) {
Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.NOMINAL);
replacementMap.put(attribute, newAttribute);
}
}
Set<Entry<Attribute, Attribute>> replacements = replacementMap.entrySet();
// creating mapping and adding to table and exampleSet
for (Entry<Attribute, Attribute> replacement: replacements) {
SortedSet<Tupel<Double, String>> ranges = rangesMap.get(replacement.getKey().getName());
Attribute newAttribute = replacement.getValue();
table.addAttribute(newAttribute);
exampleSet.getAttributes().addRegular(newAttribute);
if (ranges != null) {
for (Tupel<Double, String> rangePair : ranges) {
newAttribute.getMapping().mapString(rangePair.getSecond());
}
}
}
// copying data
for (Example example: exampleSet) {
for (Entry<Attribute, Attribute> replacement: replacements) {
Attribute originalAttribute = replacement.getKey();
Attribute newAttribute = replacement.getValue();
SortedSet<Tupel<Double, String>> ranges = rangesMap.get(originalAttribute.getName());
if (ranges != null) {
double value = example.getValue(originalAttribute);
int b = 0;
for (Tupel<Double, String> rangePair : ranges) {
if (Tools.isLessEqual(value, rangePair.getFirst().doubleValue())) {
example.setValue(newAttribute, b);
break;
}
b++;
}
}
}
}
// removing old attributes
for (Attribute originalAttribute: replacementMap.keySet()) {
exampleSet.getAttributes().remove(originalAttribute);
}
// removing useless nominal attributes
if (removeUseless) {
Iterator<Attribute> iterator = exampleSet.getAttributes().iterator();
while (iterator.hasNext()) {
Attribute attribute = iterator.next();
if (attribute.isNominal()) {
if (attribute.getMapping().size() < 2) {
iterator.remove();
}
}
}
}
return exampleSet;
}
public void setRanges(HashMap<Attribute, double[]> rangesMap, String rangeName, boolean longRangeNames) {
this.rangesMap = new HashMap<String, SortedSet<Tupel<Double, String>>>();
Iterator<Map.Entry<Attribute, double[]>> r = rangesMap.entrySet().iterator();
while (r.hasNext()) {
Map.Entry<Attribute, double[]> entry = r.next();
Attribute attribute = entry.getKey();
TreeSet<Tupel<Double, String>> ranges = new TreeSet<Tupel<Double, String>>();
int i = 1;
String lastLimit = Tools.formatIntegerIfPossible(Double.NEGATIVE_INFINITY);
for (double rangeValue : entry.getValue()) {
String newLimit = Tools.formatIntegerIfPossible(rangeValue);
String usedRangeName = rangeName + i;
if (longRangeNames) {
usedRangeName += " [" + lastLimit + " - " + newLimit + "]";
}
ranges.add(new Tupel<Double, String>(rangeValue, usedRangeName));
i++;
lastLimit = newLimit;
}
this.rangesMap.put(attribute.getName(), ranges);
}
}
public void setRanges(HashMap<String, SortedSet<Tupel<Double, String>>> rangesMap) {
this.rangesMap = rangesMap;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
for (String attributeName : rangesMap.keySet()) {
buffer.append(Tools.getLineSeparator());
buffer.append(Tools.getLineSeparator());
buffer.append(attributeName);
buffer.append(Tools.getLineSeparator());
SortedSet<Tupel<Double, String>> set = rangesMap.get(attributeName);
boolean first = true;
buffer.append(Double.NEGATIVE_INFINITY + " <= ");
for (Tupel<Double, String> tupel : set) {
if (first) {
first = false;
buffer.append(tupel.getSecond() + " <= " + tupel.getFirst());
} else {
buffer.append(" <= " + tupel.getSecond() + " <= " + tupel.getFirst());
}
}
}
return buffer.toString();
}
public Attributes getTargetAttributes(ExampleSet parentSet) {
SimpleAttributes attributes = new SimpleAttributes();
// add special attributes to new attributes
Iterator<AttributeRole> specialRoles = parentSet.getAttributes().specialAttributes();
while (specialRoles.hasNext()) {
attributes.add(specialRoles.next());
}
// add regular attributes
for (Attribute attribute : parentSet.getAttributes()) {
if (!attribute.isNumerical() || !attributeNames.contains(attribute.getName())) {
attributes.addRegular(attribute);
} else {
// create nominal mapping
SortedSet<Tupel<Double, String>> ranges = rangesMap.get(attribute.getName());
if (ranges.size() > 1) {
NominalMapping mapping = new PolynominalMapping();
for (Tupel<Double, String> rangePair : ranges) {
mapping.mapString(rangePair.getSecond());
}
// giving new attributes old name: connection to rangesMap
attributes.addRegular(new ViewAttribute(this, attribute, attribute.getName(), Ontology.POLYNOMINAL, mapping));
}
}
}
return attributes;
}
public double getValue(Attribute targetAttribute, double value) {
SortedSet<Tupel<Double, String>> ranges = rangesMap.get(targetAttribute.getName());
if (ranges != null) {
int b = 0;
for (Tupel<Double, String> rangePair : ranges) {
if (Tools.isLessEqual(value, rangePair.getFirst().doubleValue())) {
return b;
}
b++;
}
return Double.NaN;
} else {
return value;
}
}
}