/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.normalization;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDReal;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.parameter.ParameterHandler;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.math.container.Range;
/**
* A normalization method for bringing the sum of all attribute values to 1.
* @author Sebastian Land
*
*/
public class ProportionNormalizationMethod extends AbstractNormalizationMethod {
@Override
public Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd, InputPort exampleSetInputPort, ParameterHandler parameterHandler) throws UndefinedParameterError {
if (amd.getValueSetRelation() == SetRelation.EQUAL) {
if (emd.getNumberOfExamples().isKnown())
amd.setMean(new MDReal(1d / emd.getNumberOfExamples().getValue()));
else
amd.setMean(new MDReal());
Range range = amd.getValueRange();
if (range.getLower() < 0d)
exampleSetInputPort.addError(new SimpleMetaDataError(Severity.WARNING, exampleSetInputPort, "attribute_contains_negative_values", amd.getName(), getName()));
} else {
// set to unknown
amd.setMean(new MDReal());
amd.setValueRange(new Range(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), SetRelation.UNKNOWN);
}
return Collections.singleton(amd);
}
@Override
public AbstractNormalizationModel getNormalizationModel(ExampleSet exampleSet, Operator operator) {
// calculating attribute sums
Attributes attributes = exampleSet.getAttributes();
double[] attributeSum = new double[attributes.size()];
for (Example example: exampleSet) {
int i = 0;
for (Attribute attribute: attributes) {
if (attribute.isNumerical()) {
attributeSum[i] += example.getValue(attribute);
}
i++;
}
}
HashMap<String, Double> attributeSums = new HashMap<String, Double>();
int i = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
if (attribute.isNumerical()) {
attributeSums.put(attribute.getName(), attributeSum[i]);
}
i++;
}
return new ProportionNormalizationModel(exampleSet, attributeSums);
}
@Override
public String getName() {
return "proportion transformation";
}
}