/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* InterquartileRange.java
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
*/
package weka.filters.unsupervised.attribute;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.SimpleBatchFilter;
import java.util.Enumeration;
import java.util.Vector;
/**
<!-- globalinfo-start -->
* A filter for detecting outliers and extreme values based on interquartile ranges. The filter skips the class attribute.<br/>
* <br/>
* Outliers:<br/>
* Q3 + OF*IQR < x <= Q3 + EVF*IQR<br/>
* or<br/>
* Q1 - EVF*IQR <= x < Q1 - OF*IQR<br/>
* <br/>
* Extreme values:<br/>
* x > Q3 + EVF*IQR<br/>
* or<br/>
* x < Q1 - EVF*IQR<br/>
* <br/>
* Key:<br/>
* Q1 = 25% quartile<br/>
* Q3 = 75% quartile<br/>
* IQR = Interquartile Range, difference between Q1 and Q3<br/>
* OF = Outlier Factor<br/>
* EVF = Extreme Value Factor
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* Turns on output of debugging information.</pre>
*
* <pre> -R <col1,col2-col4,...>
* Specifies list of columns to base outlier/extreme value detection
* on. If an instance is considered in at least one of those
* attributes an outlier/extreme value, it is tagged accordingly.
* 'first' and 'last' are valid indexes.
* (default none)</pre>
*
* <pre> -O <num>
* The factor for outlier detection.
* (default: 3)</pre>
*
* <pre> -E <num>
* The factor for extreme values detection.
* (default: 2*Outlier Factor)</pre>
*
* <pre> -E-as-O
* Tags extreme values also as outliers.
* (default: off)</pre>
*
* <pre> -P
* Generates Outlier/ExtremeValue pair for each numeric attribute in
* the range, not just a single indicator pair for all the attributes.
* (default: off)</pre>
*
* <pre> -M
* Generates an additional attribute 'Offset' per Outlier/ExtremeValue
* pair that contains the multiplier that the value is off the median.
* value = median + 'multiplier' * IQR
* Note: implicitely sets '-P'. (default: off)</pre>
*
<!-- options-end -->
*
* Thanks to Dale for a few brainstorming sessions.
*
* @author Dale Fletcher (dale at cs dot waikato dot ac dot nz)
* @author fracpete (fracpete at waikato dot ac dot nz)
* @version $Revision: 5987 $
*/
public class InterquartileRange
extends SimpleBatchFilter {
/** for serialization */
private static final long serialVersionUID = -227879653639723030L;
/** indicator for non-numeric attributes */
public final static int NON_NUMERIC = -1;
/** the attribute range to work on */
protected Range m_Attributes = new Range("first-last");
/** the generated indices (only for performance reasons) */
protected int[] m_AttributeIndices = null;
/** the factor for detecting outliers */
protected double m_OutlierFactor = 3;
/** the factor for detecting extreme values, by default 2*m_OutlierFactor */
protected double m_ExtremeValuesFactor = 2*m_OutlierFactor;
/** whether extreme values are also tagged as outliers */
protected boolean m_ExtremeValuesAsOutliers = false;
/** the upper extreme value threshold (= Q3 + EVF*IQR) */
protected double[] m_UpperExtremeValue = null;
/** the upper outlier threshold (= Q3 + OF*IQR) */
protected double[] m_UpperOutlier = null;
/** the lower outlier threshold (= Q1 - OF*IQR) */
protected double[] m_LowerOutlier = null;
/** the interquartile range */
protected double[] m_IQR = null;
/** the median */
protected double[] m_Median = null;
/** the lower extreme value threshold (= Q1 - EVF*IQR) */
protected double[] m_LowerExtremeValue = null;
/** whether to generate Outlier/ExtremeValue attributes for each attribute
* instead of a general one */
protected boolean m_DetectionPerAttribute = false;
/** the position of the outlier attribute */
protected int[] m_OutlierAttributePosition = null;
/** whether to add another attribute called "Offset", that lists the
* 'multiplier' by which the outlier/extreme value is away from the median,
* i.e., value = median + 'multiplier' * IQR <br/>
* automatically enables m_DetectionPerAttribute!
*/
protected boolean m_OutputOffsetMultiplier = false;
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return
"A filter for detecting outliers and extreme values based on "
+ "interquartile ranges. The filter skips the class attribute.\n\n"
+ "Outliers:\n"
+ " Q3 + OF*IQR < x <= Q3 + EVF*IQR\n"
+ " or\n"
+ " Q1 - EVF*IQR <= x < Q1 - OF*IQR\n"
+ "\n"
+ "Extreme values:\n"
+ " x > Q3 + EVF*IQR\n"
+ " or\n"
+ " x < Q1 - EVF*IQR\n"
+ "\n"
+ "Key:\n"
+ " Q1 = 25% quartile\n"
+ " Q3 = 75% quartile\n"
+ " IQR = Interquartile Range, difference between Q1 and Q3\n"
+ " OF = Outlier Factor\n"
+ " EVF = Extreme Value Factor";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result = new Vector();
Enumeration enm = super.listOptions();
while (enm.hasMoreElements())
result.add(enm.nextElement());
result.addElement(new Option(
"\tSpecifies list of columns to base outlier/extreme value detection\n"
+ "\ton. If an instance is considered in at least one of those\n"
+ "\tattributes an outlier/extreme value, it is tagged accordingly.\n"
+ " 'first' and 'last' are valid indexes.\n"
+ "\t(default none)",
"R", 1, "-R <col1,col2-col4,...>"));
result.addElement(new Option(
"\tThe factor for outlier detection.\n"
+ "\t(default: 3)",
"O", 1, "-O <num>"));
result.addElement(new Option(
"\tThe factor for extreme values detection.\n"
+ "\t(default: 2*Outlier Factor)",
"E", 1, "-E <num>"));
result.addElement(new Option(
"\tTags extreme values also as outliers.\n"
+ "\t(default: off)",
"E-as-O", 0, "-E-as-O"));
result.addElement(new Option(
"\tGenerates Outlier/ExtremeValue pair for each numeric attribute in\n"
+ "\tthe range, not just a single indicator pair for all the attributes.\n"
+ "\t(default: off)",
"P", 0, "-P"));
result.addElement(new Option(
"\tGenerates an additional attribute 'Offset' per Outlier/ExtremeValue\n"
+ "\tpair that contains the multiplier that the value is off the median.\n"
+ "\t value = median + 'multiplier' * IQR\n"
+ "Note: implicitely sets '-P'."
+ "\t(default: off)",
"M", 0, "-M"));
return result.elements();
}
/**
* Parses a list of options for this object. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* Turns on output of debugging information.</pre>
*
* <pre> -R <col1,col2-col4,...>
* Specifies list of columns to base outlier/extreme value detection
* on. If an instance is considered in at least one of those
* attributes an outlier/extreme value, it is tagged accordingly.
* 'first' and 'last' are valid indexes.
* (default none)</pre>
*
* <pre> -O <num>
* The factor for outlier detection.
* (default: 3)</pre>
*
* <pre> -E <num>
* The factor for extreme values detection.
* (default: 2*Outlier Factor)</pre>
*
* <pre> -E-as-O
* Tags extreme values also as outliers.
* (default: off)</pre>
*
* <pre> -P
* Generates Outlier/ExtremeValue pair for each numeric attribute in
* the range, not just a single indicator pair for all the attributes.
* (default: off)</pre>
*
* <pre> -M
* Generates an additional attribute 'Offset' per Outlier/ExtremeValue
* pair that contains the multiplier that the value is off the median.
* value = median + 'multiplier' * IQR
* Note: implicitely sets '-P'. (default: off)</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
super.setOptions(options);
tmpStr = Utils.getOption("R", options);
if (tmpStr.length() != 0)
setAttributeIndices(tmpStr);
else
setAttributeIndices("first-last");
tmpStr = Utils.getOption("O", options);
if (tmpStr.length() != 0)
setOutlierFactor(Double.parseDouble(tmpStr));
else
setOutlierFactor(3);
tmpStr = Utils.getOption("E", options);
if (tmpStr.length() != 0)
setExtremeValuesFactor(Double.parseDouble(tmpStr));
else
setExtremeValuesFactor(2*getOutlierFactor());
setExtremeValuesAsOutliers(Utils.getFlag("E-as-O", options));
setDetectionPerAttribute(Utils.getFlag("P", options));
setOutputOffsetMultiplier(Utils.getFlag("M", options));
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
result.add("-R");
if (!getAttributeIndices().equals(""))
result.add(getAttributeIndices());
else
result.add("first-last");
result.add("-O");
result.add("" + getOutlierFactor());
result.add("-E");
result.add("" + getExtremeValuesFactor());
if (getExtremeValuesAsOutliers())
result.add("-E-as-O");
if (getDetectionPerAttribute())
result.add("-P");
if (getOutputOffsetMultiplier())
result.add("-M");
return (String[]) result.toArray(new String[result.size()]);
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String attributeIndicesTipText() {
return
"Specify range of attributes to act on; "
+ " this is a comma separated list of attribute indices, with"
+ " \"first\" and \"last\" valid values; specify an inclusive"
+ " range with \"-\", eg: \"first-3,5,6-10,last\".";
}
/**
* Gets the current range selection
*
* @return a string containing a comma separated list of ranges
*/
public String getAttributeIndices() {
return m_Attributes.getRanges();
}
/**
* Sets which attributes are to be used for interquartile calculations and
* outlier/extreme value detection (only numeric attributes among the
* selection will be used).
*
* @param value a string representing the list of attributes. Since
* the string will typically come from a user, attributes
* are indexed from 1. <br> eg: first-3,5,6-last
* @throws IllegalArgumentException if an invalid range list is supplied
*/
public void setAttributeIndices(String value) {
m_Attributes.setRanges(value);
}
/**
* Sets which attributes are to be used for interquartile calculations and
* outlier/extreme value detection (only numeric attributes among the
* selection will be used).
*
* @param value an array containing indexes of attributes to work on.
* Since the array will typically come from a program,
* attributes are indexed from 0.
* @throws IllegalArgumentException if an invalid set of ranges is supplied
*/
public void setAttributeIndicesArray(int[] value) {
setAttributeIndices(Range.indicesToRangeList(value));
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String outlierFactorTipText() {
return "The factor for determining the thresholds for outliers.";
}
/**
* Sets the factor for determining the thresholds for outliers.
*
* @param value the factor.
*/
public void setOutlierFactor(double value) {
if (value >= getExtremeValuesFactor())
System.err.println("OutlierFactor must be smaller than ExtremeValueFactor");
else
m_OutlierFactor = value;
}
/**
* Gets the factor for determining the thresholds for outliers.
*
* @return the factor.
*/
public double getOutlierFactor() {
return m_OutlierFactor;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String extremeValuesFactorTipText() {
return "The factor for determining the thresholds for extreme values.";
}
/**
* Sets the factor for determining the thresholds for extreme values.
*
* @param value the factor.
*/
public void setExtremeValuesFactor(double value) {
if (value <= getOutlierFactor())
System.err.println("ExtremeValuesFactor must be greater than OutlierFactor!");
else
m_ExtremeValuesFactor = value;
}
/**
* Gets the factor for determining the thresholds for extreme values.
*
* @return the factor.
*/
public double getExtremeValuesFactor() {
return m_ExtremeValuesFactor;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String extremeValuesAsOutliersTipText() {
return "Whether to tag extreme values also as outliers.";
}
/**
* Set whether extreme values are also tagged as outliers.
*
* @param value whether or not to tag extreme values also as outliers.
*/
public void setExtremeValuesAsOutliers(boolean value) {
m_ExtremeValuesAsOutliers = value;
}
/**
* Get whether extreme values are also tagged as outliers.
*
* @return true if extreme values are also tagged as outliers.
*/
public boolean getExtremeValuesAsOutliers() {
return m_ExtremeValuesAsOutliers;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String detectionPerAttributeTipText() {
return
"Generates Outlier/ExtremeValue attribute pair for each numeric "
+ "attribute, not just a single pair for all numeric attributes together.";
}
/**
* Set whether an Outlier/ExtremeValue attribute pair is generated for
* each numeric attribute ("true") or just one pair for all numeric
* attributes together ("false").
*
* @param value whether or not to generate indicator attribute pairs
* for each numeric attribute.
*/
public void setDetectionPerAttribute(boolean value) {
m_DetectionPerAttribute = value;
if (!m_DetectionPerAttribute)
m_OutputOffsetMultiplier = false;
}
/**
* Gets whether an Outlier/ExtremeValue attribute pair is generated for
* each numeric attribute ("true") or just one pair for all numeric
* attributes together ("false").
*
* @return true if indicator attribute pairs are generated for
* each numeric attribute.
*/
public boolean getDetectionPerAttribute() {
return m_DetectionPerAttribute;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String outputOffsetMultiplierTipText() {
return
"Generates an additional attribute 'Offset' that contains the "
+ "multiplier the value is off the median: "
+ "value = median + 'multiplier' * IQR";
}
/**
* Set whether an additional attribute "Offset" is generated per
* Outlier/ExtremeValue attribute pair that lists the multiplier the value
* is off the median: value = median + 'multiplier' * IQR.
*
* @param value whether or not to generate the additional attribute.
*/
public void setOutputOffsetMultiplier(boolean value) {
m_OutputOffsetMultiplier = value;
if (m_OutputOffsetMultiplier)
m_DetectionPerAttribute = true;
}
/**
* Gets whether an additional attribute "Offset" is generated per
* Outlier/ExtremeValue attribute pair that lists the multiplier the value
* is off the median: value = median + 'multiplier' * IQR.
*
* @return true if the additional attribute is generated.
*/
public boolean getOutputOffsetMultiplier() {
return m_OutputOffsetMultiplier;
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enableAllAttributes();
result.enable(Capability.MISSING_VALUES);
// class
result.enableAllClasses();
result.enable(Capability.MISSING_CLASS_VALUES);
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Determines the output format based on the input format and returns
* this. In case the output format cannot be returned immediately, i.e.,
* hasImmediateOutputFormat() returns false, then this method will called
* from batchFinished() after the call of preprocess(Instances), in which,
* e.g., statistics for the actual processing step can be gathered.
*
* @param inputFormat the input format to base the output format on
* @return the output format
* @throws Exception in case the determination goes wrong
* @see #hasImmediateOutputFormat()
* @see #batchFinished()
*/
protected Instances determineOutputFormat(Instances inputFormat)
throws Exception {
FastVector atts;
FastVector values;
Instances result;
int i;
// attributes must be numeric
m_Attributes.setUpper(inputFormat.numAttributes() - 1);
m_AttributeIndices = m_Attributes.getSelection();
for (i = 0; i < m_AttributeIndices.length; i++) {
// ignore class
if (m_AttributeIndices[i] == inputFormat.classIndex()) {
m_AttributeIndices[i] = NON_NUMERIC;
continue;
}
// not numeric -> ignore it
if (!inputFormat.attribute(m_AttributeIndices[i]).isNumeric())
m_AttributeIndices[i] = NON_NUMERIC;
}
// get old attributes
atts = new FastVector();
for (i = 0; i < inputFormat.numAttributes(); i++)
atts.addElement(inputFormat.attribute(i));
if (!getDetectionPerAttribute()) {
m_OutlierAttributePosition = new int[1];
m_OutlierAttributePosition[0] = atts.size();
// add 2 new attributes
values = new FastVector();
values.addElement("no");
values.addElement("yes");
atts.addElement(new Attribute("Outlier", values));
values = new FastVector();
values.addElement("no");
values.addElement("yes");
atts.addElement(new Attribute("ExtremeValue", values));
}
else {
m_OutlierAttributePosition = new int[m_AttributeIndices.length];
for (i = 0; i < m_AttributeIndices.length; i++) {
if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
m_OutlierAttributePosition[i] = atts.size();
// add new attributes
values = new FastVector();
values.addElement("no");
values.addElement("yes");
atts.addElement(
new Attribute(
inputFormat.attribute(
m_AttributeIndices[i]).name() + "_Outlier", values));
values = new FastVector();
values.addElement("no");
values.addElement("yes");
atts.addElement(
new Attribute(
inputFormat.attribute(
m_AttributeIndices[i]).name() + "_ExtremeValue", values));
if (getOutputOffsetMultiplier())
atts.addElement(
new Attribute(
inputFormat.attribute(
m_AttributeIndices[i]).name() + "_Offset"));
}
}
// generate header
result = new Instances(inputFormat.relationName(), atts, 0);
result.setClassIndex(inputFormat.classIndex());
return result;
}
/**
* computes the thresholds for outliers and extreme values
*
* @param instances the data to work on
*/
protected void computeThresholds(Instances instances) {
int i;
double[] values;
int[] sortedIndices;
int half;
int quarter;
double q1;
double q2;
double q3;
m_UpperExtremeValue = new double[m_AttributeIndices.length];
m_UpperOutlier = new double[m_AttributeIndices.length];
m_LowerOutlier = new double[m_AttributeIndices.length];
m_LowerExtremeValue = new double[m_AttributeIndices.length];
m_Median = new double[m_AttributeIndices.length];
m_IQR = new double[m_AttributeIndices.length];
for (i = 0; i < m_AttributeIndices.length; i++) {
// non-numeric attribute?
if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
// sort attribute data
values = instances.attributeToDoubleArray(m_AttributeIndices[i]);
sortedIndices = Utils.sort(values);
// determine indices
half = sortedIndices.length / 2;
quarter = half / 2;
if (sortedIndices.length % 2 == 1) {
q2 = values[sortedIndices[half]];
}
else {
q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
}
if (half % 2 == 1) {
q1 = values[sortedIndices[quarter]];
q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
}
else {
q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
}
// determine thresholds and other values
m_Median[i] = q2;
m_IQR[i] = q3 - q1;
m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
m_UpperOutlier[i] = q3 + getOutlierFactor() * m_IQR[i];
m_LowerOutlier[i] = q1 - getOutlierFactor() * m_IQR[i];
m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
}
}
/**
* returns whether the instance has an outlier in the specified attribute
* or not
*
* @param inst the instance to test
* @param index the attribute index
* @return true if the instance is an outlier
*/
protected boolean isOutlier(Instance inst, int index) {
boolean result;
double value;
value = inst.value(m_AttributeIndices[index]);
result = ((m_UpperOutlier[index] < value) && (value <= m_UpperExtremeValue[index]))
|| ((m_LowerExtremeValue[index] <= value) && (value < m_LowerOutlier[index]));
return result;
}
/**
* returns whether the instance is an outlier or not
*
* @param inst the instance to test
* @return true if the instance is an outlier
*/
protected boolean isOutlier(Instance inst) {
boolean result;
int i;
result = false;
for (i = 0; i < m_AttributeIndices.length; i++) {
// non-numeric attribute?
if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
result = isOutlier(inst, i);
if (result)
break;
}
return result;
}
/**
* returns whether the instance has an extreme value in the specified
* attribute or not
*
* @param inst the instance to test
* @param index the attribute index
* @return true if the instance is an extreme value
*/
protected boolean isExtremeValue(Instance inst, int index) {
boolean result;
double value;
value = inst.value(m_AttributeIndices[index]);
result = (value > m_UpperExtremeValue[index])
|| (value < m_LowerExtremeValue[index]);
return result;
}
/**
* returns whether the instance is an extreme value or not
*
* @param inst the instance to test
* @return true if the instance is an extreme value
*/
protected boolean isExtremeValue(Instance inst) {
boolean result;
int i;
result = false;
for (i = 0; i < m_AttributeIndices.length; i++) {
// non-numeric attribute?
if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
result = isExtremeValue(inst, i);
if (result)
break;
}
return result;
}
/**
* returns the mulitplier of the IQR the instance is off the median for this
* particular attribute.
*
* @param inst the instance to test
* @param index the attribute index
* @return the multiplier
*/
protected double calculateMultiplier(Instance inst, int index) {
double result;
double value;
value = inst.value(m_AttributeIndices[index]);
result = (value - m_Median[index]) / m_IQR[index];
return result;
}
/**
* Processes the given data (may change the provided dataset) and returns
* the modified version. This method is called in batchFinished().
* This implementation only calls process(Instance) for each instance
* in the given dataset.
*
* @param instances the data to process
* @return the modified data
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected Instances process(Instances instances) throws Exception {
Instances result;
Instance instOld;
Instance instNew;
int i;
int n;
double[] values;
int numAttNew;
int numAttOld;
if (!isFirstBatchDone())
computeThresholds(instances);
result = getOutputFormat();
numAttOld = instances.numAttributes();
numAttNew = result.numAttributes();
for (n = 0; n < instances.numInstances(); n++) {
instOld = instances.instance(n);
values = new double[numAttNew];
System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);
// generate new instance
instNew = new DenseInstance(1.0, values);
instNew.setDataset(result);
// per attribute?
if (!getDetectionPerAttribute()) {
// outlier?
if (isOutlier(instOld))
instNew.setValue(m_OutlierAttributePosition[0], 1);
// extreme value?
if (isExtremeValue(instOld)) {
instNew.setValue(m_OutlierAttributePosition[0] + 1, 1);
// tag extreme values also as outliers?
if (getExtremeValuesAsOutliers())
instNew.setValue(m_OutlierAttributePosition[0], 1);
}
}
else {
for (i = 0; i < m_AttributeIndices.length; i++) {
// non-numeric attribute?
if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
// outlier?
if (isOutlier(instOld, m_AttributeIndices[i]))
instNew.setValue(m_OutlierAttributePosition[i], 1);
// extreme value?
if (isExtremeValue(instOld, m_AttributeIndices[i])) {
instNew.setValue(m_OutlierAttributePosition[i] + 1, 1);
// tag extreme values also as outliers?
if (getExtremeValuesAsOutliers())
instNew.setValue(m_OutlierAttributePosition[i], 1);
}
// add multiplier?
if (getOutputOffsetMultiplier())
instNew.setValue(
m_OutlierAttributePosition[i] + 2,
calculateMultiplier(instOld, m_AttributeIndices[i]));
}
}
// copy possible strings, relational values...
copyValues(instNew, false, instOld.dataset(), getOutputFormat());
// add to output
result.add(instNew);
}
return result;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5987 $");
}
/**
* Main method for testing this class.
*
* @param args should contain arguments to the filter: use -h for help
*/
public static void main(String[] args) {
runFilter(new InterquartileRange(), args);
}
}