/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.SimpleAttributes; import com.rapidminer.example.Statistics; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.ViewAttribute; import com.rapidminer.operator.OperatorException; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.RandomGenerator; /** * @author Sebastian Land */ public class NoiseModel extends PreprocessingModel { private static final long serialVersionUID = -1953073746280248791L; // settings private double attributeNoise; private double labelNoise; private String[] noiseAttributeNames; private double noiseOffset; private double noiseFactor; // data needed during viewing private Attribute viewLabelParent; private Attribute viewLabel; private Set<Attribute> noiseAttributes = new HashSet<Attribute>(); private RandomGenerator random; private Map<String, Double> noiseMap; private double labelRange; public NoiseModel(ExampleSet exampleSet, RandomGenerator localRandom, List<String[]> noises, double attributeNoise, double labelNoise, double noiseOffsett, double noiseFactor, String[] attributeNames) { super(exampleSet); this.attributeNoise = attributeNoise; this.labelNoise = labelNoise; this.noiseOffset = noiseOffsett; this.noiseFactor = noiseFactor; this.noiseAttributeNames = attributeNames; this.random = localRandom; // read noise values from list noiseMap = new HashMap<String, Double>(); Iterator<String[]> i = noises.iterator(); while (i.hasNext()) { String[] pair = i.next(); noiseMap.put(pair[0], Double.valueOf(pair[1])); } Attribute label = exampleSet.getAttributes().getLabel(); if (label != null) { exampleSet.recalculateAttributeStatistics(label); double min = exampleSet.getStatistics(label, Statistics.MINIMUM); double max = exampleSet.getStatistics(label, Statistics.MAXIMUM); labelRange = Math.abs(max - min); } } @Override public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { // add noise to existing attributes Iterator<Example> reader = exampleSet.iterator(); Attribute label = exampleSet.getAttributes().getLabel(); while (reader.hasNext()) { Example example = reader.next(); // attribute noise for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNumerical()) { Double noiseObject = noiseMap.get(attribute.getName()); double noise = noiseObject == null ? attributeNoise : noiseObject.doubleValue(); double noiseValue = random.nextGaussian() * noise; example.setValue(attribute, example.getValue(attribute) + noiseValue); } } // label noise if (label != null) { if (label.isNumerical()) { double noiseValue = random.nextGaussian() * labelNoise * labelRange; example.setValue(label, example.getValue(label) + noiseValue); } else if (label.isNominal() && (label.getMapping().size() >= 2)) { if (random.nextDouble() < labelNoise) { int oldValue = (int) example.getValue(label); int newValue = random.nextInt(label.getMapping().size() - 1); if (newValue >= oldValue) newValue++; example.setValue(label, newValue); } } } } // add new noise attributes List<Attribute> newAttributes = new LinkedList<Attribute>(); for (String name: noiseAttributeNames) { Attribute newAttribute = AttributeFactory.createAttribute(name, Ontology.REAL); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); } for (Example example: exampleSet) { for (Attribute attribute: newAttributes) { example.setValue(attribute, noiseOffset + noiseFactor * random.nextGaussian()); } } return exampleSet; } @Override public Attributes getTargetAttributes(ExampleSet parentSet) { SimpleAttributes attributes = new SimpleAttributes(); // add special attributes to new attributes Iterator<AttributeRole> specialRoles = parentSet.getAttributes().specialAttributes(); while (specialRoles.hasNext()) { AttributeRole role = specialRoles.next(); if (role.getSpecialName().equals(Attributes.LABEL_NAME) && labelNoise != 0d) { AttributeRole clonedRole = (AttributeRole) role.clone(); viewLabelParent = role.getAttribute(); viewLabel = new ViewAttribute(this, viewLabelParent, viewLabelParent.getName(), viewLabelParent.getValueType(), (viewLabelParent.isNominal()) ? viewLabelParent.getMapping() : null); clonedRole.setAttribute(viewLabel); attributes.add(clonedRole); } else { attributes.add(specialRoles.next()); } } // add regular attributes Iterator<AttributeRole> i = parentSet.getAttributes().allAttributeRoles(); while (i.hasNext()) { AttributeRole attributeRole = i.next(); if (!attributeRole.isSpecial()) { Attribute attribute = attributeRole.getAttribute(); if (attribute.isNumerical()) { attributes.addRegular(new ViewAttribute(this, attribute, attribute.getName(), Ontology.REAL, null)); } else { attributes.add(attributeRole); } } } // add new noise attributes for (String name: noiseAttributeNames) { Attribute viewAttribute = new ViewAttribute(this, null, name, Ontology.REAL, null); attributes.addRegular(viewAttribute); noiseAttributes.add(viewAttribute); } return attributes; } @Override public double getValue(Attribute targetAttribute, double value) { if (targetAttribute == viewLabel) { // label noise if (viewLabel.isNumerical()) { double min = getTrainingHeader().getStatistics(viewLabelParent, Statistics.MINIMUM); double max = getTrainingHeader().getStatistics(viewLabelParent, Statistics.MAXIMUM); double labelRange = Math.abs(max - min); return value + random.nextGaussian() * labelNoise * labelRange; } else if (viewLabel.isNominal() && (viewLabel.getMapping().size() >= 2)) { if (random.nextDouble() < labelNoise) { int oldValue = (int) value; int newValue = oldValue; while (newValue == oldValue) { newValue = random.nextInt(viewLabel.getMapping().size()); } return newValue; } } } else if (noiseAttributes.contains(targetAttribute)) { return noiseOffset + noiseFactor * random.nextGaussian(); } else { // attributeNoise Double noiseObject = noiseMap.get(targetAttribute.getName()); double noise = noiseObject == null ? attributeNoise : noiseObject.doubleValue(); double noiseValue = random.nextGaussian() * noise; return value + noiseValue; } return 0; } @Override public boolean isSupportingAttributeRoles() { return true; } }