/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.generator;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.math.BinaryPeakFinder;
import com.rapidminer.tools.math.Complex;
import com.rapidminer.tools.math.FastFourierTransform;
import com.rapidminer.tools.math.Peak;
import com.rapidminer.tools.math.PeakFinder;
import com.rapidminer.tools.math.SpectrumFilter;
import com.rapidminer.tools.math.WindowFunction;
/**
* Factory class to produce new attributes based on the fourier synthesis of the label mapped on an
* attribute dimension.
*
* @author Ingo Mierswa
*/
public class SinusFactory {
/** Indicates the min evidence factor. */
private static final double MIN_EVIDENCE = 0.2d;
public static final String[] ADAPTION_TYPES = { "uniformly", "uniformly_without_nu", "gaussian" };
public static final int UNIFORMLY = 0;
public static final int UNIFORMLY_WITHOUT_NU = 1;
public static final int GAUSSIAN = 2;
/**
* Generates this number of peaks in a range of <code>epsilon * frequency</code>. Necessary
* because the FT does not deliver the correct frequency (aliasing, leakage) in all cases. In
* later releases this should be replaced by a gradient search or a evolutionary search for the
* correct value.
*/
private int attributesPerPeak = 3;
/**
* Generates this <code>peaksPerPeak</code> peaks in the range of
* <code>epsilon * frequency</code>. Necessary because the FT does not deliver the correct
* frequency (aliasing, leakage) in all cases. In later releases this should be replaced by a
* gradient search or a evolutionary search for the correct value.
*/
private double epsilon = 0.1;
/** Indicates the type of frequency adaption. */
private int adaptionType = UNIFORMLY;
/**
* The maximal number of generated attributes for each possible attribute. Corresponds to the
* highest peaks in the frequency spectrum of the label in the source attribute's space.
*/
private int maxPeaks = 5;
/** The fast fourier transformation calculator. */
private FastFourierTransform fft = null;
/**
* The spectrum filter type which should be applied on the spectrum after the fourier
* transformation.
*/
private SpectrumFilter filter = null;
/** The algorithm to find the peaks in the frequency spectrum. */
private PeakFinder peakFinder = null;
/**
* Creates a new sinus factory which creates <code>maxPeaks</code> new peaks. Uses
* Blackman-Harris window function and no spectrum filter as default. The adaption type is
* gaussian with an epsilon of 0.1. The factory produces three attributes for each highest peak
* as default.
*/
public SinusFactory(int maxPeaks) {
this.maxPeaks = maxPeaks;
this.fft = new FastFourierTransform(WindowFunction.BLACKMAN_HARRIS);
this.filter = new SpectrumFilter(SpectrumFilter.NONE);
this.peakFinder = new BinaryPeakFinder();
}
public void setAdaptionType(int type) {
this.adaptionType = type;
}
public void setEpsilon(double epsilon) {
this.epsilon = epsilon;
}
/** Must be bigger than 2! */
public void setAttributePerPeak(int attributesPerPeak) {
this.attributesPerPeak = attributesPerPeak;
}
/**
* Calculates the fourier transformation from the first attribute on the second and delivers the
* <code>maxPeaks</code> highest peaks. Returns a list with the highest attribute peaks.
*/
public List<AttributePeak> getAttributePeaks(ExampleSet exampleSet, Attribute first, Attribute second)
throws OperatorException {
exampleSet.recalculateAllAttributeStatistics();
Complex[] result = fft.getFourierTransform(exampleSet, first, second);
Peak[] spectrum = filter.filter(result, exampleSet.size());
double average = 0.0d;
for (int k = 0; k < spectrum.length; k++) {
average += spectrum[k].getMagnitude();
}
average /= spectrum.length;
List<Peak> peaks = peakFinder.getPeaks(spectrum);
Collections.sort(peaks);
if (maxPeaks < peaks.size()) {
peaks = peaks.subList(0, maxPeaks);
}
// remember highest peaks
double inputDeviation = Math.sqrt(exampleSet.getStatistics(second, Statistics.VARIANCE))
/ (exampleSet.getStatistics(second, Statistics.MAXIMUM)
- exampleSet.getStatistics(second, Statistics.MINIMUM));
double maxEvidence = Double.NaN;
List<AttributePeak> attributes = new LinkedList<AttributePeak>();
for (Peak peak : peaks) {
double evidence = peak.getMagnitude() / average * (1.0d / inputDeviation);
if (Double.isNaN(maxEvidence)) {
maxEvidence = evidence;
}
if (evidence > MIN_EVIDENCE * maxEvidence) {
attributes.add(new AttributePeak(second, peak.getIndex(), evidence));
}
}
return attributes;
}
/**
* Generates a new sinus function attribute for all given attribute peaks. Since the frequency
* cannot be calculated exactly (leakage, aliasing), several new attribute may be added for each
* peak. These additional attributes are randomly chosen (uniformly in epsilon range, uniformly
* without nu, gaussian with epsilon as standard deviation)
*/
public void generateSinusFunctions(ExampleSet exampleSet, List<AttributePeak> attributes, Random random)
throws GenerationException {
if (attributes.isEmpty()) {
return;
}
Collections.sort(attributes);
double totalMaxEvidence = attributes.get(0).getEvidence();
for (AttributePeak ae : attributes) {
if (ae.getEvidence() > MIN_EVIDENCE * totalMaxEvidence) {
for (int i = 0; i < attributesPerPeak; i++) {
double frequency = ae.getFrequency();
switch (adaptionType) {
case UNIFORMLY:
if (attributesPerPeak != 1) {
frequency = (double) i / (double) (attributesPerPeak - 1) * 2.0d * epsilon * frequency
+ (frequency - epsilon * frequency);
}
break;
case UNIFORMLY_WITHOUT_NU:
if (attributesPerPeak != 1) {
frequency = (double) i / (double) (attributesPerPeak - 1) * 2.0d * epsilon
+ (frequency - epsilon);
}
break;
case GAUSSIAN:
frequency = random.nextGaussian() * epsilon + frequency;
break;
}
// frequency constant
List<Attribute> frequencyResult = generateAttribute(exampleSet, new ConstantGenerator(frequency));
// scaling with frequency
FeatureGenerator scale = new BasicArithmeticOperationGenerator(
BasicArithmeticOperationGenerator.PRODUCT);
scale.setArguments(new Attribute[] { frequencyResult.get(0), ae.getAttribute() });
List<Attribute> scaleResult = generateAttribute(exampleSet, scale);
// calc sin
FeatureGenerator sin = new TrigonometricFunctionGenerator(TrigonometricFunctionGenerator.SINUS);
sin.setArguments(new Attribute[] { scaleResult.get(0) });
List<Attribute> sinResult = generateAttribute(exampleSet, sin);
for (Attribute attribute : sinResult) {
exampleSet.getAttributes().addRegular(attribute);
}
}
}
}
}
private List<Attribute> generateAttribute(ExampleSet exampleSet, FeatureGenerator generator) throws GenerationException {
List<FeatureGenerator> generators = new LinkedList<FeatureGenerator>();
generators.add(generator);
return FeatureGenerator.generateAll(exampleSet.getExampleTable(), generators);
}
}