/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* PairwiseSelector.java
* Copyright (C) 2002 Mikhail Bilenko
*
*/
package weka.core.metrics;
import java.util.*;
import java.io.Serializable;
import java.util.ArrayList;
import weka.core.*;
/**
* Abstract PairwiseSelector class. Given a metric and training data,
* create a set of instance pairs that correspond to metric training data
*
* @author Mikhail Bilenko (mbilenko@cs.utexas.edu)
* @version $Revision: 1.2 $
*/
public abstract class PairwiseSelector {
/** A hashmap where class attribute values are mapped to lists of instances of that class */
protected HashMap m_classInstanceMap = null;
/** A list of classes, each element is the double value of the class attribute */
protected ArrayList m_classValueList = null;
/** The number of possible same-class pairs */
protected int m_numPotentialPositives = 0;
/** The number of possible different-class pairs */
protected int m_numPotentialNegatives = 0;
/**
* Provide an array of metric pairs metric using given training instances
*
* @param metric the metric to train
* @param instances data to train the metric on
* @exception Exception if training has gone bad.
*/
public abstract ArrayList createPairList(Instances instances, int numPosPairs, int numNegPairs, Metric metric) throws Exception;
/** Initialize m_classInstanceMap and m_classValueList using a given set of instances */
public void initSelector(Instances instances) {
HashMap sumMap = new HashMap();
m_classValueList = new ArrayList();
m_classInstanceMap = new HashMap();
m_numPotentialPositives = 0;
m_numPotentialNegatives = 0;
// go through all instances, hashing them into lists corresponding to each class
System.out.println("Got " + instances.numInstances());
Enumeration enum = instances.enumerateInstances();
int counter = 0;
while (enum.hasMoreElements()) {
Instance instance = (Instance) enum.nextElement();
if (instance.classIsMissing()) {
System.err.println("Instance has missing class!!!");
continue;
}
Double classValue = new Double(instance.classValue());
// check whether this class has been seen, and get its list of instances
ArrayList classInstanceList;
if (m_classInstanceMap.containsKey(classValue)) {
classInstanceList = (ArrayList) m_classInstanceMap.get(classValue);
} else { // create a new list of instances for a previously unseen class
classInstanceList = new ArrayList();
m_classInstanceMap.put(classValue, classInstanceList);
m_classValueList.add(classValue);
}
// check that the instance is not a dupe of previously seen instances
double valueSum = 0;
for (int i = 0; i < instance.numValues(); i++) {
valueSum += instance.value(i);
}
// prevent duplicate training instances
if (sumMap.containsKey(new Double(valueSum))) {
ArrayList sumList = (ArrayList) sumMap.get(new Double(valueSum));
boolean unique = true;
for (int i = 0; i < sumList.size() && unique; i++) {
Instance nextInstance = (Instance) sumList.get(i);
unique = false;
for (int j = 0; j < nextInstance.numValues() && !unique; j++) {
if (nextInstance.value(j) != instance.value(j)) {
unique = true;
}
}
if (!unique) {
// This is a dupe!
break;
}
}
if (unique) {
classInstanceList.add(instance);
}
} else {
classInstanceList.add(instance);
ArrayList sumList = new ArrayList();
sumList.add(instance);
sumMap.put(new Double(valueSum), sumList);
}
}
// get the number of potential positive pairs
Iterator iterator = m_classInstanceMap.values().iterator();
while (iterator.hasNext()) {
ArrayList classInstanceList = (ArrayList) iterator.next();
m_numPotentialPositives += classInstanceList.size() * (classInstanceList.size() - 1) / 2;
System.out.println(classInstanceList.size() + "\t" + m_numPotentialPositives);
}
int numInstances = instances.numInstances();
m_numPotentialNegatives = numInstances * (numInstances - 1) / 2 - m_numPotentialPositives;
}
/**
* Creates a new instance of a metric learner given it's class name and
* (optional) arguments to pass to it's setOptions method. If the
* classifier implements OptionHandler and the options parameter is
* non-null, the classifier will have it's options set.
*
* @param metricLearnerName the fully qualified class name of the metric learner
* @param options an array of options suitable for passing to setOptions. May
* be null.
* @return the newly created metric learner, ready for use.
* @exception Exception if the metric learner name is invalid, or the options
* supplied are not acceptable to the metric learner
*/
public static PairwiseSelector forName(String pairwiseSelectorName,
String [] options) throws Exception {
System.out.println("Instantiating a pairwise selector: " + pairwiseSelectorName +
" with options: " + weka.classifiers.sparse.IBkMetric.concatStringArray(options));
PairwiseSelector p = (PairwiseSelector)Utils.forName(PairwiseSelector.class,
pairwiseSelectorName,
options);
System.out.println("success");
return p;
}
}