/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MetricLearner.java
* Copyright (C) 2002 Mikhail Bilenko
*
*/
package weka.core.metrics;
import java.util.*;
import java.io.Serializable;
import java.io.*;
import java.text.SimpleDateFormat;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import weka.classifiers.*;
import weka.classifiers.functions.*;
import weka.core.*;
import weka.attributeSelection.*;
/**
* AttrEvalMetricLearner - sets the weights of a metric
* using scores from an attribute evaluator
*
* @author Mikhail Bilenko (mbilenko@cs.utexas.edu)
* @version $Revision: 1.2 $
*/
public class AttrEvalMetricLearner extends MetricLearner implements Serializable, OptionHandler {
/** The metric that the classifier was used to learn, useful for external-calculation based metrics */
protected LearnableMetric m_metric = null;
/** The attribute evaluator used */
protected ASEvaluation m_evaluator = new InfoGainAttributeEval();
/** Create a new attribute evaluator metric learner
* @param classifierName the name of the classifier class to be used
*/
public AttrEvalMetricLearner() {
}
/**
* Train a given met7ric using given training instances
*
* @param metric the metric to train
* @param instances data to train the metric on
* @exception Exception if training has gone bad.
*/
public void trainMetric(LearnableMetric metric, Instances instances) throws Exception {
// If the data doesn't have a class attribute, bail
if (instances.classIndex() < 0 || instances.numInstances() < 2) {
metric.m_trained = false;
System.out.println("Problem with training data");
return;
}
if (metric.getExternal()) {
throw new Exception("AttrEvalMetricLearner cannot be used as an external distance metric!");
}
System.out.println(getTimestamp() + " Starting to calculate weights" );
m_evaluator.buildEvaluator(instances);
double[] weights = new double[metric.getNumAttributes()];
int posWeightsCount = 0;
int negWeightsCount = 0;
NumberFormat decFormat = new DecimalFormat("0.000E0#");
for (int i = 0; i < weights.length; i++) {
weights[i] = ((AttributeEvaluator)m_evaluator).evaluateAttribute(i);
if (weights[i] > 0) {
posWeightsCount++;
if (i < 100) {
System.out.print(decFormat.format(weights[i]) + " " + instances.attribute(i).name() + "\t\t");
if (posWeightsCount % 4 == 0) System.out.println();
}
} else if (weights[i] < 0) {
negWeightsCount++;
}
}
metric.setWeights(normalizeWeights(weights));
System.out.println("Top components1:");
int[] sortedIndeces = Utils.sort(weights);
for (int i = sortedIndeces.length-1; i > sortedIndeces.length-300 && i >=0; i--) {
int idx = sortedIndeces[i];
System.out.println(i + ": " + idx + ":" + instances.attribute(idx) + "(" + weights[idx] + ")");
}
System.out.println(getTimestamp() + " Learned weights: " +
m_evaluator.getClass().getName() + " got " + posWeightsCount + " positive and " +
negWeightsCount + " negative weights out of " + weights.length);
metric.m_trained = true;
}
/** Normalize weights
* @param weights an unnormalized array of weights
* @return a normalized array of weights
*/
protected double[] normalizeWeights(double[] weights) {
double sum = 0;
for (int i = 0; i < weights.length; i++) {
sum += weights[i];
}
double [] newWeights = new double[weights.length];
for (int i = 0; i < weights.length; i++) {
newWeights[i] = weights[i] / sum;
}
return newWeights;
}
/**
* Use the Classifier for an estimation of similarity
* @param instance1 first instance of a pair
* @param instance2 second instance of a pair
* @returns sim an approximate similarity obtained from the classifier
*/
public double getSimilarity(Instance instance1, Instance instance2) throws Exception{
throw new Exception("InfoGainMetricLearner cannot be used as an external distance metric!");
}
/**
* Use the Classifier for an estimation of distance
* @param instance1 first instance of a pair
* @param instance2 second instance of a pair
* @returns an approximate distance obtained from the classifier
*/
public double getDistance(Instance instance1, Instance instance2) throws Exception{
throw new Exception("InfoGainMetricLearner cannot be used as an external distance metric!");
}
/**
* Set the evaluator
*/
public void setEvaluator(ASEvaluation evaluator) throws Exception {
if (evaluator instanceof AttributeEvaluator) {
m_evaluator = evaluator;
} else {
throw new Exception("Evaluator must be a child class of AttributeEvaluator!");
}
}
/**
* Get the evaluator
*/
public ASEvaluation getEvaluator() {
return m_evaluator;
}
/**
* Gets the current settings of WeightedDotP.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String [] getOptions() {
String [] options = new String [20];
int current = 0;
options[current++] = "-E";
options[current++] = m_evaluator.getClass().getName();
if (m_evaluator instanceof OptionHandler) {
String[] evaluatorOptions = ((OptionHandler)m_evaluator).getOptions();
for (int i = 0; i < evaluatorOptions.length; i++) {
options[current++] = evaluatorOptions[i];
}
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -B classifierstring
*/
public void setOptions(String[] options) throws Exception {
}
/**
* Gets a string containing current date and time.
*
* @return a string containing the date and time.
*/
protected static String getTimestamp() {
return (new SimpleDateFormat("HH:mm:ss:")).format(new Date());
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(0);
return newVector.elements();
}
/** Obtain a textual description of the metriclearner
* @return a textual description of the metric learner
*/
public String toString() {
return new String("InfoGainMetricLearner " + concatStringArray(getOptions()));
}
/** A little helper to create a single String from an array of Strings
* @param strings an array of strings
* @returns a single concatenated string, separated by commas
*/
public static String concatStringArray(String[] strings) {
String result = new String();
for (int i = 0; i < strings.length; i++) {
result = result + "\"" + strings[i] + "\" ";
}
return result;
}
}