KL.java example

Explorer
wekax-master
- weka-3-6-2
- wekaUT
  - GetAllSubPackages.java
  - weka
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    KL.java
 *    Copyright (C) 2003 Mikhail Bilenko
 *
 */

package weka.core.metrics;

import weka.core.*;
import weka.deduping.metrics.HashMapVector;

import java.util.*;

import java.io.*;


/** 
 * KL class
 *
 * Implements weighted Kullback-Leibler divergence 
 *
 * @author Mikhail Bilenko (mbilenko@cs.utexas.edu)
 * @version $Revision: 1.23 $
 */

public class KL extends SmoothingMetric
  implements InstanceConverter, OptionHandler {
  public final double LOG2 = Math.log(2);

  /** We can switch between regular KL divergence and I-divergence */
  protected boolean m_useIDivergence = true;

  /** Frequencies over the entire dataset used for smoothing */
  protected HashMapVector m_datasetFrequencies = null;

  /** We hash sum(p log(p)) terms for the input instances to speed up computation */
  protected HashMap m_instanceNormHash = null; 

  /** Total number of tokens in the dataset */
  protected int m_numTotalTokens = 0;

  /** Different smoothing methods for obtaining probability distributions from frequencies  */
  public static final int SMOOTHING_UNSMOOTHED = 1;
  public static final int SMOOTHING_DIRICHLET = 2;
  public static final int SMOOTHING_JELINEK_MERCER = 4;
  public static final Tag[] TAGS_SMOOTHING = {
    new Tag(SMOOTHING_UNSMOOTHED, "unsmoothed"),
    new Tag(SMOOTHING_DIRICHLET, "Dirichlet"),
    new Tag(SMOOTHING_JELINEK_MERCER, "Jelinek-Mercer")
      };
  /** The smoothing method */
  protected int m_smoothingType = SMOOTHING_UNSMOOTHED;
 
  /** The pseudocount value for the Dirichlet smoothing */
  protected double m_pseudoCountDirichlet = 1.0;

  /** The lambda value for the Jelinek-Mercer smoothing */
  protected double m_lambdaJM = 0.5;
     
  /** We can have different ways of converting from distance to similarity  */
  public static final int CONVERSION_LAPLACIAN = 1;
  public static final int CONVERSION_UNIT = 2;
  public static final int CONVERSION_EXPONENTIAL = 4;
  public static final Tag[] TAGS_CONVERSION = {
    new Tag(CONVERSION_UNIT, "similarity = 1-distance"),
    new Tag(CONVERSION_LAPLACIAN, "similarity=1/(1+distance)"),
    new Tag(CONVERSION_EXPONENTIAL, "similarity=exp(-distance)")
      };
  /** The method of converting, by default laplacian */
  protected int m_conversionType = CONVERSION_LAPLACIAN;

  /** A metric learner responsible for training the parameters of the metric */
  protected MetricLearner m_metricLearner = new ClassifierMetricLearner();
//    protected MetricLearner m_metricLearner = new GDMetricLearner();



  /** A hashmap that maps every instance to a set of instances with which JS has been computed */
  protected HashMap m_instanceConstraintMap = new HashMap();
  
  /**
   * Create a new metric.
   * @param numAttributes the number of attributes that the metric will work on
   */ 
  public KL(int numAttributes) throws Exception {
    super(); 
    buildMetric(numAttributes);
  }

  /** Create a default new metric */
  public KL() {
    m_fixedMaxDistance = true; 
    m_maxDistance = 1; 
  } 
   
  /**
   * Creates a new metric which takes specified attributes.
   *
   * @param _attrIdxs An array containing attribute indeces that will
   * be used in the metric
   */
  public KL(int[] _attrIdxs) throws Exception {
    super();
    setAttrIdxs(_attrIdxs);
    buildMetric(_attrIdxs.length);	
  }

  /**
   * Reset all values that have been learned
   */
  public void resetMetric() throws Exception {
    super.resetMetric();
    m_currAlpha = m_alpha;
    m_instanceConstraintMap = new HashMap();
  }

  /**
   * Generates a new Metric. Has to initialize all fields of the metric
   * with default values.
   *
   * @param numAttributes the number of attributes that the metric will work on
   * @exception Exception if the distance metric has not been
   * generated successfully.
   */
  public void buildMetric(int numAttributes) throws Exception {
    m_numAttributes = numAttributes;
    m_attrWeights = new double[numAttributes];
    m_attrIdxs = new int[numAttributes];
    for (int i = 0; i < numAttributes; i++) {
      m_attrWeights[i] = 1;
      m_attrIdxs[i] = i;
    }
    m_instanceConstraintMap = new HashMap();

    m_currAlpha = m_alpha;
  }

    
  /**
   * Generates a new Metric. Has to initialize all fields of the metric
   * with default values
   *
   * @param options an array of options suitable for passing to setOptions.
   * May be null. 
   * @exception Exception if the distance metric has not been
   * generated successfully.
   */
  public void buildMetric(int numAttributes, String[] options) throws Exception {
    buildMetric(numAttributes);
  }

  /**
   * Create a new metric for operating on specified instances
   * @param data instances that the metric will be used on
   */
  public  void buildMetric(Instances data) throws Exception {
    m_classIndex = data.classIndex();
    m_numAttributes = data.numAttributes();
    if (m_classIndex != m_numAttributes-1 && m_classIndex != -1) {
      throw new Exception("Class attribute (" + m_classIndex + ") should be the last attribute!!!");
    }
    if (m_classIndex != -1) {
      m_numAttributes--;
    }
    buildMetric(m_numAttributes);

    // hash the dataset-wide frequencies
    m_datasetFrequencies = new HashMapVector(); //  # of occurrences of each unique token in dataset
    m_numTotalTokens = 0;  // total # of (non-unique) tokens in the dataset
    double []instanceLengths = new double[data.numInstances()]; // num tokens per instance
    
    for (int i = 0; i < data.numInstances(); i++) {
      Instance instance = data.instance(i);
      for (int j = 0; j < instance.numValues(); j++) {
	Attribute attr = instance.attributeSparse(j);
	int attrIdx = instance.index(j);
	if (attrIdx != m_classIndex) {
	  m_datasetFrequencies.increment(attr.name(), instance.value(attr));
	  m_numTotalTokens += instance.value(attr);
	  instanceLengths[i] += instance.value(attr);
	}
      }
    }

    // convert all instances in the dataset
    System.out.println("\n\nConverting all instances for KL distance\n");
    Instances convertedData = new Instances(data, data.numInstances());
    for (int i = 0; i < data.numInstances(); i++) {
      convertedData.add(convertInstance(data.instance(i)));
      if (i % 10 == 9 ) System.out.print(".");
      if (i % 100 == 99) System.out.println(" " + (i+1));      
    }
    System.out.println();
    
    // copy all converted instances to original data
    data.delete();
    for (int i = 0; i < convertedData.numInstances(); i++) {
      data.add(convertedData.instance(i));
    }

    // Hash instance norms
    m_instanceNormHash = new HashMap();
    for (int i = 0; i < data.numInstances(); i++) {
      Instance instance = data.instance(i);
      double norm = 0;

      for (int j = 0; j < instance.numValues(); j++) { 
	int attrIdx = instance.index(j);
	if (attrIdx != m_classIndex) {
	  double value = instance.value(attrIdx);
	  norm += value * Math.log(value);
	}
      }
      m_instanceNormHash.put(instance, new Double(norm));
    }

    if (m_trainable) {
      learnMetric(data);
    }
  }

  public Instance convertInstance(Instance oldInstance) {
    Instance newInstance;
    if (oldInstance instanceof SparseInstance
	&& m_smoothingType == SMOOTHING_UNSMOOTHED) {
      newInstance = new SparseInstance(oldInstance);
    } else { // either original data is dense, or smoothing is on - returning dense.
      newInstance = new Instance(oldInstance.numAttributes());
    }
    newInstance.setDataset(oldInstance.dataset());
    int classIdx = oldInstance.classIndex();

    // get the total count of tokens for this instance
    double numTotalTokens = 0;
    for (int i = 0; i < oldInstance.numValues(); i++) {
      int idx = oldInstance.index(i);
      if (idx != classIdx) { 
	numTotalTokens += oldInstance.valueSparse(i);
      }
    }

    // we're iterating over newInstance in case
    // there was a transition from sparse to non-sparse. 
    for (int i = 0; i < newInstance.numValues(); i++) {
      int idx = newInstance.index(i);
      if (idx != classIdx) {
	Attribute attr = newInstance.attribute(idx);
	newInstance.setValue(idx, convertFrequency(oldInstance.value(idx),
						   numTotalTokens, attr.name()));
      }
    }
    return newInstance;
  } 

  /** Given a frequency of a given token in a document, convert 
   *  it to a probability value for that document's distribution
   * @param freq frequency of a token
   * @param token the token
   * @returns a probability value
   */
  protected double convertFrequency(double freq, double numTotalTokens, String token) {
    double datasetProb = 0; 
    switch (m_smoothingType) {
    case SMOOTHING_UNSMOOTHED:
      return freq/numTotalTokens;
    case SMOOTHING_DIRICHLET:
      datasetProb = m_datasetFrequencies.getWeight(token) / m_numTotalTokens;
      return (freq + m_pseudoCountDirichlet * datasetProb) /
	(numTotalTokens + m_pseudoCountDirichlet);
    case SMOOTHING_JELINEK_MERCER:
      datasetProb = m_datasetFrequencies.getWeight(token) / m_numTotalTokens;
      return (1 - m_lambdaJM) * (freq / numTotalTokens) + m_lambdaJM * datasetProb;
    default:
      System.err.println("Unknown smoothing method: " + m_smoothingType); 
      return -1; 
    }
  }

  /** Smooth an instance */
  public Instance smoothInstance(Instance instance) {
    int numAttributes = instance.numAttributes(); 
    double[] values = new double[numAttributes];
    double prior = 1.0/ numAttributes;
    for (int j = 0; j < numAttributes; j++) {
      values[j] = 1.0 / (1 + m_alpha) * (instance.value(j) +  m_alpha * prior);
    }

    return  new Instance(1.0, values); 
  } 
    
  
  /**
   * Returns a distance value between two instances. 
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if distance could not be estimated.
   */
  public double distance(Instance instance1, Instance instance2) throws Exception {
    // either pass the computation to the external classifier, or do the work yourself
    if (m_trainable && m_external && m_trained) {
      return m_metricLearner.getDistance(instance1, instance2);
    } else {
      return distanceInternal(instance1, instance2);
    }
  }

  /** Return the penalty contribution - KL */
  public double penalty(Instance instance1,
			Instance instance2) throws Exception {
    double distance = distance(instance1, instance2);
    return distance; 
  }

  /** Return the penalty contribution - JS */
  public double penaltySymmetric(Instance instance1,
			Instance instance2) throws Exception {
    double distance = distanceJS(instance1, instance2);
    return distance; 
  }


  /**
   * Returns a distance value between two instances. 
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if distance could not be estimated.
   */
  public double distanceInternal(Instance instance1, Instance instance2) throws Exception {
    if (instance1 instanceof SparseInstance) {
      return distanceSparse((SparseInstance)instance1, instance2);
    } else {
      return distanceNonSparse(instance1, instance2);
    }
  }
    

  /** Returns a distance value between two sparse instances. 
   * @param instance1 First sparse instance.
   * @param instance2 Second sparse instance.
   * @exception Exception if distance could not be estimated.
   */
  public double distanceSparse(SparseInstance instance1, Instance instance2) throws Exception {
    double distance = 0, value1, value2, idivTerm = 0;
    int numValues1 = instance1.numValues();
    boolean dbg = true;
	    
    // iterate through the attributes that are present in the first instance
    for (int i = 0; i < numValues1; i++) {
      int attrIdx = instance1.index(i);
      if (attrIdx != m_classIndex) {
	value1 = instance1.valueSparse(i);
	value2 = instance2.value(attrIdx);
	if (value2 > 0) { 
	  distance += m_attrWeights[attrIdx] * value1 * Math.log(value1/value2);
	  if (m_useIDivergence) {
	    idivTerm -= m_attrWeights[attrIdx] * value1;
	  }
	  //	    if (dbg) { System.out.println("\t" + attrIdx + "\t" + value1 + "  " + value2 + "\t" + distance); dbg= false;}
	} else {
	  System.err.println("KL.distanceNonSparse:  0 value in instance2, attribute=" + attrIdx + "\n" + instance2.value(attrIdx) + "\n" + instance2); 	  
	  return Double.MAX_VALUE;
	} 
      }
    }

    // if i-divergence is used, need to pick up values of instance2
    if (m_useIDivergence) { 
      for (int i = 0; i < instance2.numValues(); i++) {
	int attrIdx = instance2.index(i);
	if (attrIdx != m_classIndex) {
	  value2 = instance2.valueSparse(i);
	  idivTerm += m_attrWeights[attrIdx] * value2;
	}
      }
    }

    distance = distance + idivTerm;
    return distance;
  }


  /** Returns a distance value between non-sparse instances without using the weights
   * @param instance1 non-sparse instance.
   * @param instance2 non-sparse instance.
   * @exception Exception if distance could not be estimated.
   */
  public double distanceNonSparse(Instance instance1, Instance instance2) throws Exception {
    double distance = 0, idivTerm = 0;
    double [] values1 = instance1.toDoubleArray();
    double [] values2 = instance2.toDoubleArray();
    // Go through all attributes
    for (int i = 0; i < values1.length; i++) {
      if (i != m_classIndex) {
	if (values2[i] > 0) { 
	  distance += m_attrWeights[i] * (values1[i] * Math.log(values1[i]/values2[i]));
	  if (m_useIDivergence) {
	    idivTerm -= m_attrWeights[i] * (values1[i] - values2[i]);
	  }
	} else {  // instance2 has a 0 value
	  System.err.println("KL.distanceNonSparse:  0 value in instance2, attribute=" + i); 
	  return  Double.MAX_VALUE;
	}
      }
    }
    distance = distance + idivTerm;
    return distance;
  };


  /**
   * Returns Jensen-Shannon distance value between two instances. 
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if distanceJS could not be estimated.
   */
  public double distanceJS(Instance instance1, Instance instance2) throws Exception {
    if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) {
      return distanceJSSparse((SparseInstance)instance1, (SparseInstance)instance2);
    } else {
      return distanceJSNonSparse(instance1, instance2);
    }
  }
    

  /** Returns Jensen-Shannon distance between two sparse instances. 
   * @param instance1 First sparse instance.
   * @param instance2 Second sparse instance.
   * @exception Exception if distanceJS could not be estimated.
   */
  public double distanceJSSparse(SparseInstance instance1,
				 SparseInstance instance2) throws Exception {
    double distanceJS = 0;
    boolean lookupOK = false; 
    if (m_instanceConstraintMap.containsKey(instance1)) {
      HashMap instanceDiffInstanceMap =
	(HashMap) m_instanceConstraintMap.get(instance1);
      if (instanceDiffInstanceMap.containsKey(instance2)) {
	lookupOK = true;
	SparseInstance diffVector = (SparseInstance) instanceDiffInstanceMap.get(instance2);
	for (int i = 0; i < diffVector.numValues(); i++) {
	  int idx = diffVector.index(i);
	  distanceJS += m_attrWeights[idx] * diffVector.valueSparse(i);
	} 
      } 
    }
    if (!lookupOK) { 
      double value1, value2, sum1 = 0, sum2 = 0;
      int numValues1 = instance1.numValues();
      int numValues2 = instance2.numValues();
      int maxNumValues = numValues1 + numValues2;  // the overall number of attributes
      double [] attrValues = new double[maxNumValues];
      int [] indices = new int[maxNumValues];
      Arrays.fill(attrValues, 0);
      Arrays.fill(indices, Integer.MAX_VALUE);

      // pick up the values from instance2 that didn't occur in instance1
      int counter = 0, counter1 = 0, counter2 = 0;
      int attrIdx1 = 0, attrIdx2 = 0;
      while (counter1 < numValues1 || counter2 < numValues2) {
	if (counter1 < numValues1) {
	  attrIdx1 = instance1.index(counter1);
	} else {
	  attrIdx1 = Integer.MAX_VALUE;
	}

	if (counter2 < numValues2) {
	  attrIdx2 = instance2.index(counter2);
	} else {
	  attrIdx2 = Integer.MAX_VALUE;
	}

	while (attrIdx1 < attrIdx2 && counter1 < numValues1 ) {
	  if (attrIdx1 != m_classIndex) { 
	    sum1 += m_attrWeights[attrIdx1] * instance1.valueSparse(counter1);
	    attrValues[counter] = 0.5 * instance1.valueSparse(counter1);
	    indices[counter] = attrIdx1;
	    counter++;
	  }
	  counter1++;
	  if (counter1 < numValues1) { 
	    attrIdx1 = instance1.index(counter1);
	  }
	}

	while (attrIdx2 < attrIdx1 && counter2 < numValues2 ) {
	  if (attrIdx2 != m_classIndex) { 
	    sum2 += m_attrWeights[attrIdx2] * instance2.valueSparse(counter2);
	    attrValues[counter] = 0.5 * instance2.valueSparse(counter2);
	    indices[counter] = attrIdx2;
	    counter++;
	  }
	  counter2++;
	  if (counter2 < numValues2) {
	    attrIdx2 = instance2.index(counter2);
	  }
	}

	if (attrIdx1 == attrIdx2 && attrIdx1 != m_classIndex && attrIdx1 < Integer.MAX_VALUE && attrIdx2 < Integer.MAX_VALUE) {
	  value1 = instance1.valueSparse(counter1);
	  value2 = instance2.valueSparse(counter2);
	  distanceJS += m_attrWeights[attrIdx1] * (value1 * Math.log(value1) + value2 * Math.log(value2) 
						   - (value1 + value2) * Math.log((value1+value2)/2.0));
	  attrValues[counter] = 0.5 * (value1 * Math.log(value1) + value2 * Math.log(value2) -
				       (value1 + value2) * Math.log((value1+value2)/2.0)) / LOG2; 
	  indices[counter] = attrIdx1;
	  counter++;
	  counter1++;
	  counter2++;
	} else if (attrIdx1 == m_classIndex) {
	  if (instance1.classValue() == instance2.classValue()) {
	    attrValues[counter] = 1;
	  } else {
	    attrValues[counter] = -1;
	  }
	  indices[counter] = m_classIndex;
	  counter++;
	  counter1++;
	  counter2++;
	} 
      }

      SparseInstance diffInstanceJS = new SparseInstance(1.0, attrValues, indices, instance1.dataset().numAttributes());
      diffInstanceJS.setDataset(instance1.dataset());

      // hash the diff-instance for both instances involved
      HashMap instanceDiffInstanceMap1;
      if (m_instanceConstraintMap.containsKey(instance1)) {
	instanceDiffInstanceMap1 = (HashMap) m_instanceConstraintMap.get(instance1);
      } else { 
	instanceDiffInstanceMap1 = new HashMap();
	m_instanceConstraintMap.put(instance1, instanceDiffInstanceMap1);
      } 
      instanceDiffInstanceMap1.put(instance2, diffInstanceJS);

      HashMap instanceDiffInstanceMap2;
      if (m_instanceConstraintMap.containsKey(instance2)) {
	instanceDiffInstanceMap2 = (HashMap) m_instanceConstraintMap.get(instance2);
      } else { 
	instanceDiffInstanceMap2 = new HashMap();
	m_instanceConstraintMap.put(instance2, instanceDiffInstanceMap2);
      } 
      instanceDiffInstanceMap2.put(instance1, diffInstanceJS);

      distanceJS = 0.5 * (sum1 + sum2 + distanceJS / LOG2);
      if (distanceJS > 1.00001)  System.out.println("TROUBLE: distanceJS=" + distanceJS + " sum1=" + sum1 + " sum2=" + sum2);
    }
    return distanceJS;
  }

  /** Returns Jensen-Shannon distance between a non-sparse instance and a sparse instance
   * @param instance1 sparse instance.
   * @param instance2 sparse instance.
   * @exception Exception if distanceJS could not be estimated.
   */
  public double distanceJSSparseNonSparse(SparseInstance instance1, Instance instance2) throws Exception {
    double diff, distanceJS = 0, sum2 = 0;
    int numValues1 = instance1.numValues();
    int numValues2 = instance2.numValues();
    double [] values2 = instance2.toDoubleArray();

    // add all contributions of the second instance; unnecessary ones will be subtracted later
    for (int i = 0; i < values2.length; i++) {
      if (i != m_classIndex) {
	sum2 += m_attrWeights[i] * values2[i];
      }
    }

    for (int i = 0; i < numValues1; i++) {
      int attrIdx = instance1.index(i);

      if (attrIdx != m_classIndex) {
	double value1 = instance1.valueSparse(i);
	double value2 = values2[attrIdx];
	if (value1 != 0 && value2 != 0) { 
	  distanceJS += m_attrWeights[attrIdx] * (value1 * Math.log(value1) + value2 * Math.log(value2) 
						  - (value1 + value2) * Math.log((value1+value2)/2.0));
	  sum2 -= m_attrWeights[attrIdx] * value2;  // subtract the contribution previously added
	} 
      }
    } 
    distanceJS = 0.5 * (sum2 + distanceJS / LOG2);
    return distanceJS;
  }


  /** Returns Jensen-Shannon distance between non-sparse instances without using the weights
   * @param instance1 non-sparse instance.
   * @param instance2 non-sparse instance.
   * @exception Exception if distanceJS could not be estimated.
   */
  public double distanceJSNonSparse(Instance instance1, Instance instance2) throws Exception {
    double distanceJS = 0, sum1 = 0, sum2 = 0;
    double [] values1 = instance1.toDoubleArray();
    double [] values2 = instance2.toDoubleArray();
    // Go through all attributes
    for (int i = 0; i < values1.length; i++) {
      if (i != m_classIndex) {
	if (values1[i] != 0 && values2[i] != 0) {
	  distanceJS += m_attrWeights[i] * (values1[i] * Math.log(values1[i]) + values2[i] * Math.log(values2[i]) 
					    - (values1[i] + values2[i]) * Math.log((values1[i]+values2[i])/2.0));
	} else if (values1[i] != 0) {
	  sum1 += m_attrWeights[i] * values1[i];
	} else if (values2[i] != 0) {
	  sum2 += m_attrWeights[i] * values2[i];
	}
      }
    }
    distanceJS = 0.5 * (sum1 + sum2 + distanceJS / LOG2);
    return distanceJS;
  };




  /**
   * Returns a similarity estimate between two instances. Similarity is obtained by
   * inverting the distance value using one of three methods:
   * CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT.
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if similarity could not be estimated.
   */
  public double similarity(Instance instance1, Instance instance2) throws Exception {
    switch (m_conversionType) {
    case CONVERSION_LAPLACIAN: 
      return 1 / (1 + distance(instance1, instance2));
    case CONVERSION_UNIT:
      return 2 * (1 - distance(instance1, instance2));
    case CONVERSION_EXPONENTIAL:
      return Math.exp(-distance(instance1, instance2));
    default:
      throw new Exception ("Unknown distance to similarity conversion method");
    }
  }

 
  /**
   * Returns distance between two instances without using the weights.
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if similarity could not be estimated.
   */
  public double distanceNonWeighted(Instance instance1, Instance instance2) throws Exception {
    return distance(instance1, instance2);
  }


    /**
   * Returns a similarity estimate between two instances without using the weights.
   * @param instance1 First instance.
   * @param instance2 Second instance.
   * @exception Exception if similarity could not be estimated.
   */
  public double similarityNonWeighted(Instance instance1, Instance instance2) throws Exception {
    switch (m_conversionType) {
    case CONVERSION_LAPLACIAN: 
      return 1 / (1 + distanceNonWeighted(instance1, instance2));
    case CONVERSION_UNIT:
      return 2 * (1 - distanceNonWeighted(instance1, instance2));
    case CONVERSION_EXPONENTIAL:
      return Math.exp(-distanceNonWeighted(instance1, instance2));
    default:
      throw new Exception ("Unknown distance to similarity conversion method");
    }
  }


  /** Get the values of the partial derivates for the metric components
   * for a particular instance pair
   @param instance1 the first instance
   @param instance2 the first instance
  */
  public double[] getGradients(Instance instance1, Instance instance2) throws Exception {
    double[] gradients = new double[m_numAttributes];
    double distance = distanceInternal(instance1, instance2);

    // gradients are zero for 0-distance instances
    if (distance == 0) {
      return gradients;
    }

    // take care of SparseInstances by enumerating over the values of the first instance
    for (int i = 0; i < m_numAttributes; i++) {
      // get the values 
      double val1 = instance1.valueSparse(i);
      Attribute attr = instance1.attributeSparse(i);
      double val2 = instance2.value(attr);
      // TODO: why was this gradient used earlier??
      //      gradients[i] = 1.0 / (2*distance) * (val2 - val1) * (val2 - val1);
      if (val2 > 0) {
	gradients[i] = val1 * Math.log(val1/val2) - (val1 - val2);
      }
    }
    return gradients;
  }

  /** get the normalizer value */
  public double getNormalizer() {
    return 0;
  }
  

  /** Train the metric
   */
  public void learnMetric (Instances data) throws Exception {
    if (m_metricLearner == null) {
      System.err.println("Metric learner for KL is not initalized. No training was conducted");
      return;
    }
    // need Idivergence to be switched on during metric learning
    if (m_useIDivergence == false) {
      System.out.println("Using IDvergence ...");
      m_useIDivergence = true;
    }
    m_metricLearner.trainMetric(this, data);
  }

  /**
   * Set the distance metric learner
   *
   * @param metricLearner the metric learner
   */
  public void setMetricLearner (MetricLearner metricLearner) {
    m_metricLearner = metricLearner;
  }
 
  /**
   * Get the distance metric learner
   *
   * @returns the distance metric learner that this metric employs
   */
  public MetricLearner getMetricLearner () {
    return m_metricLearner;
  }
 
    
  /**
   * Create an instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first instance
   * @param instance2 second instance
   */
  public Instance createDiffInstance (Instance instance1, Instance instance2) {
    if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) {
      return  createDiffInstanceSparse((SparseInstance)instance1, (SparseInstance)instance2);
    } else if (instance1 instanceof SparseInstance) {
      return createDiffInstanceSparseNonSparse((SparseInstance)instance1, instance2);
    }  else if (instance2 instanceof SparseInstance) {
      return createDiffInstanceSparseNonSparse((SparseInstance)instance2, instance1);
    } else {
      return createDiffInstanceNonSparse(instance1, instance2);
    }
  }

  /**
   * Create a sparse instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first sparse instance
   * @param instance2 second sparse instance
   */
  protected SparseInstance createDiffInstanceSparse (SparseInstance instance1, SparseInstance instance2) {
    int numValues1 = instance1.numValues();
    int numValues2 = instance2.numValues();
    int maxNumValues =numValues1 + numValues2;  // the overall number of attributes
    int classIndex = instance1.classIndex();
    
    int counter = 0;
    double [] attrValues = new double[maxNumValues];
    int [] indices = new int[maxNumValues];
    Arrays.fill(attrValues, 0);
    Arrays.fill(indices, Integer.MAX_VALUE);

    for (int i = 0; i < numValues1; i++) {
      int attrIdx = instance1.index(i);
      indices[counter] = attrIdx;

      if (attrIdx != classIndex) {  // skip class attributes
	double value1 = instance1.valueSparse(i);

	int idx2 = instance2.locateIndex(attrIdx);
	if (idx2 >=0 && attrIdx == instance2.index(idx2)) {
	  double value2 = instance2.valueSparse(idx2);
	  if (value2 > 0) { 
	    attrValues[counter] = value1 * Math.log(value1/value2) / LOG2;
	    if (m_useIDivergence) {
	      attrValues[counter] +=  value2; 
	    }
	  }
	}
	
	if (m_useIDivergence) {
	  attrValues[counter] -=  value1;
	}

      } else { // add the class value
	if (instance1.classValue() == instance2.classValue()) {
	  attrValues[counter] = 1;
	} else {
	  attrValues[counter] = -1;
	}
      }
      counter++;
    }

    SparseInstance diffInstance = new SparseInstance(1.0, attrValues, indices, instance1.dataset().numAttributes());
    diffInstance.setDataset(instance1.dataset());

    // if i-divergence is used, need to pick up "orphan" values of instance2
    if (m_useIDivergence) { 
      for (int i = 0; i < numValues2; i++) {
	int attrIdx = instance2.index(i);
	if (attrIdx != m_classIndex) {
	  double value2 = instance2.valueSparse(i);
	  int idx1 = instance1.locateIndex(attrIdx);
	  if (idx1 < 0 || attrIdx != instance1.index(idx1)) {
	    diffInstance.setValue(attrIdx, value2); 
	  }
	}
      }
    }

    return diffInstance;
  }

  /**
   * Create an instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first sparse instance
   * @param instance2 second non-sparse instance
   */
  protected Instance createDiffInstanceSparseNonSparse (SparseInstance instance1, Instance instance2) {
    int numValues1 = instance1.numValues();
    int numValues2 = instance2.numValues();
    Instance diffInstance = null;
    double[] values2 = instance2.toDoubleArray();
    int classIndex = instance1.classIndex();
    
    if (m_useIDivergence) {
      diffInstance = new Instance(instance2);   // "orphan values" from instance2 will be copied here
      for (int i = 0; i < numValues1; i++) {
	int idx = instance1.index(i); 
	if (idx != classIndex) {  // skip class attributes
	  double value1 = instance1.valueSparse(i);
	  if (values2[idx] != 0) { 
	    diffInstance.setValue(idx, value1 * Math.log(value1/values2[idx]) / LOG2 - value1);
	  } else {
	    System.err.println("\n\n\nPROBLEM!  Zero value in non-sparse instance in createDiffInstanceSparseNonSparse\n\n\n");
	    diffInstance.setValue(idx, -value1); 
	  } 
	} else { // add the class value
	  if (instance1.classValue() == instance2.classValue()) {
	    diffInstance.setClassValue(1);
	  } else {
	    diffInstance.setClassValue(-1);
	  }
	}
      }
    } else { // I-Divergence term is not used
      int counter = 0;
      double [] attrValues = new double[numValues1];
      int [] indices = new int[numValues1];
      Arrays.fill(attrValues, 0);
      Arrays.fill(indices, Integer.MAX_VALUE);

      for (int i = 0; i < numValues1; i++) {
	int attrIdx = instance1.index(i);
	indices[counter] = attrIdx;

	if (attrIdx != classIndex) {  // skip class attributes
	  double value1 = instance1.valueSparse(i);
	  if (values2[attrIdx] > 0) { 
	    attrValues[counter] = value1 * Math.log(value1/values2[attrIdx]);
	  }
	} else { // add the class value
	  if (instance1.classValue() == instance2.classValue()) {
	    attrValues[counter] = 1;
	  } else {
	    attrValues[counter] = -1;
	  }
	}
	counter++;
      }
      diffInstance = new SparseInstance(1.0, attrValues, indices, instance1.dataset().numAttributes());
    }
    diffInstance.setDataset(instance1.dataset());
    return diffInstance;
  }
    
  /**
   * Create a nonsparse instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first nonsparse instance
   * @param instance2 second nonsparse instance
   */
  protected Instance createDiffInstanceNonSparse (Instance instance1, Instance instance2) {
    double[] values1 = instance1.toDoubleArray();
    double[] values2 = instance2.toDoubleArray();
	
    int numAttributes = values1.length;
    // create an extra attribute if there was no class index originally
    int classIndex = instance1.classIndex();
    if (classIndex < 0) {
      classIndex = numAttributes;
      numAttributes++;
    } 
    double[] diffInstanceValues = new double[numAttributes];  

    for (int i = 0; i < values1.length; i++) {
      if (i != classIndex) {  // round up to float significance to be able to weed out duplicates later
	if (values1[i] != 0 && values2[i] != 0) { 
	  diffInstanceValues[i] = values1[i] * Math.log(values1[i]/values2[i]) / LOG2;
	  if (m_useIDivergence) {
	    diffInstanceValues[i] += values2[i] - values1[i];
	  }
	} else if (values2[i] != 0)  {
	  diffInstanceValues[i] = values2[i];
	} else if (values1[i] != 0) {
	  diffInstanceValues[i] = Double.MAX_VALUE;
	}
	
      } else {  // class value
	if (values1[i] == values2[i]) {
	  diffInstanceValues[i] = 1;
	} else {
	  diffInstanceValues[i] = -1;
	}
      }
    }
    Instance diffInstance = new Instance(1.0, diffInstanceValues);
    diffInstance.setDataset(instance1.dataset());
    return diffInstance;
  }

    /**
   * Create an instance with features corresponding to JS components
   * @param instance1 first instance
   * @param instance2 second instance
   */
  public Instance createDiffInstanceJS (Instance instance1, Instance instance2) {
    if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) {
      return  createDiffInstanceJSSparse((SparseInstance)instance1, (SparseInstance)instance2);
    } else if (instance1 instanceof SparseInstance) {
      return createDiffInstanceJSSparseNonSparse((SparseInstance)instance1, instance2);
    }  else if (instance2 instanceof SparseInstance) {
      return createDiffInstanceJSSparseNonSparse((SparseInstance)instance2, instance1);
    } else {
      return createDiffInstanceJSNonSparse(instance1, instance2);
    }
  }

  /**
   * Create a sparse instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first sparse instance
   * @param instance2 second sparse instance
   */
  protected SparseInstance createDiffInstanceJSSparse (SparseInstance instance1, SparseInstance instance2) {
    SparseInstance diffInstanceJS = null;

    // try to look up this constraint in the hash
    if (m_instanceConstraintMap.containsKey(instance1)) {
      HashMap instanceDiffInstanceMap = (HashMap) m_instanceConstraintMap.get(instance1);
      if (instanceDiffInstanceMap.containsKey(instance2)) {
	diffInstanceJS = (SparseInstance) instanceDiffInstanceMap.get(instance2);
      }
    }

    // if the lookup failed, compute it and hash it
    if (diffInstanceJS == null)  { 
      System.err.println("\n\n\nThis should not happen!  Could not look up instance1 in createDiffInstanceJSSparse!\n\n\n\n");
      int numValues1 = instance1.numValues();
      int numValues2 = instance2.numValues();
      int maxNumValues = numValues1 + numValues2;  // the overall number of attributes
      double [] attrValues = new double[maxNumValues];
      int [] indices = new int[maxNumValues];
      Arrays.fill(attrValues, 0);
      Arrays.fill(indices, Integer.MAX_VALUE);

      int classIndex = instance1.classIndex();
      int counter = 0, counter1 = 0, counter2 = 0, attrIdx1, attrIdx2;

      while (counter1 < numValues1 || counter2 < numValues2) {
	if (counter1 < numValues1) {
	  attrIdx1 = instance1.index(counter1);
	} else {
	  attrIdx1 = Integer.MAX_VALUE;
	}

	if (counter2 < numValues2) {
	  attrIdx2 = instance2.index(counter2);
	} else {
	  attrIdx2 = Integer.MAX_VALUE;
	}

	while (attrIdx1 < attrIdx2 && counter1 < numValues1) {
	  if (attrIdx1 != m_classIndex) {
	    attrValues[counter] = 0.5 * instance1.valueSparse(counter1);
	    indices[counter] = attrIdx1;
	    counter++;
	  }
	  counter1++;
	  if (counter1 < numValues1) {
	    attrIdx1 = instance1.index(counter1);
	  }
	}
      
	while (attrIdx2 < attrIdx1 && counter2 < numValues2) {
	  if (attrIdx2 != m_classIndex) { 
	    attrValues[counter] = 0.5 * instance2.valueSparse(counter2);
	    indices[counter] = attrIdx2;
	    counter++;
	  }
	  counter2++;
	  if (counter2 < numValues2) { 
	    attrIdx2 = instance2.index(counter2);
	  }
	}
    
	if (attrIdx1 == attrIdx2 && attrIdx1 != m_classIndex && attrIdx1 < Integer.MAX_VALUE && attrIdx2 < Integer.MAX_VALUE) { 
	  double value1 = instance1.valueSparse(counter1);
	  double value2 = instance2.valueSparse(counter2);
	  attrValues[counter] = 0.5 * (value1 * Math.log(value1) + value2 * Math.log(value2) -
				       (value1 + value2) * Math.log((value1+value2)/2.0)) / LOG2; 
	  indices[counter] = attrIdx1;
	  counter++;
	  counter1++;
	  counter2++;
	} else if (attrIdx1 == m_classIndex) {
	  if (instance1.classValue() == instance2.classValue()) {
	    attrValues[counter] = 1;
	  } else {
	    attrValues[counter] = -1;
	  }
	  indices[counter] = m_classIndex;
	  counter++;
	  counter1++;
	  counter2++;
	} 
      }

      diffInstanceJS = new SparseInstance(1.0, attrValues, indices, instance1.dataset().numAttributes());
      diffInstanceJS.setDataset(instance1.dataset());

      // hash the diff-instance for both instances involved
      HashMap instanceDiffInstanceMap1;
      if (m_instanceConstraintMap.containsKey(instance1)) {
	instanceDiffInstanceMap1 = (HashMap) m_instanceConstraintMap.get(instance1);
      } else { 
	instanceDiffInstanceMap1 = new HashMap();
	m_instanceConstraintMap.put(instance1, instanceDiffInstanceMap1);
      } 
      instanceDiffInstanceMap1.put(instance2, diffInstanceJS);

      HashMap instanceDiffInstanceMap2;
      if (m_instanceConstraintMap.containsKey(instance2)) {
	instanceDiffInstanceMap2 = (HashMap) m_instanceConstraintMap.get(instance2);
      } else { 
	instanceDiffInstanceMap2 = new HashMap();
	m_instanceConstraintMap.put(instance2, instanceDiffInstanceMap2);
      } 
      instanceDiffInstanceMap2.put(instance1, diffInstanceJS);
    }
    return diffInstanceJS;
  }

  /**
   * Create an instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first sparse instance
   * @param instance2 second non-sparse instance
   */
  protected Instance createDiffInstanceJSSparseNonSparse (SparseInstance instance1, Instance instance2) {
    int numValues = instance2.numValues(); 
    int classIndex = instance1.classIndex();
    double [] attrValues = new double[numValues];
    double[] values2 = instance2.toDoubleArray();

    // add all contributions of the second instance; unnecessary ones will overwritten later
    for (int i = 0; i < values2.length; i++) {
      if (i != m_classIndex) {
	attrValues[i] = 0.5 * values2[i];
      }
    }

    for (int i = 0; i < instance1.numValues(); i++) {
      Attribute attribute = instance1.attributeSparse(i);
      int attrIdx = attribute.index();

      if (attrIdx != m_classIndex) {
	double value1 = instance1.value(attrIdx);
	double value2 = values2[attrIdx];
	if (value1 != 0 && value2 != 0) {
	  attrValues[attrIdx] = 0.5 * (value1 * Math.log(value1) + value2 * Math.log(value2) 
				       - (value1 + value2) * Math.log((value1+value2)/2.0)) / LOG2;
	} else if (value1 != 0) {
	  attrValues[attrIdx] = 0.5 * value1;
	} else if (value2 != 0) {
	  attrValues[attrIdx] = 0.5 * value2;
	} 
      } else { // class index
	if (instance1.classValue() == instance2.classValue()) {
	  attrValues[attrIdx] = 1;
	} else {
	  attrValues[attrIdx] = -1;
	}
      } 
    } 

    Instance diffInstanceJS = new Instance(1.0, attrValues);
    diffInstanceJS.setDataset(instance1.dataset());
    return diffInstanceJS;
  }
    
  /**
   * Create a nonsparse instance with features corresponding to dot-product components of the two given instances
   * @param instance1 first nonsparse instance
   * @param instance2 second nonsparse instance
   */
  protected Instance createDiffInstanceJSNonSparse (Instance instance1, Instance instance2) {
    double[] values1 = instance1.toDoubleArray();
    double[] values2 = instance2.toDoubleArray();
	
    int numAttributes = values1.length;
    // create an extra attribute if there was no class index originally
    int classIndex = instance1.classIndex();
    if (classIndex < 0) {
      classIndex = numAttributes;
      numAttributes++;
    } 
    double[] diffInstanceJSValues = new double[numAttributes];  

    for (int i = 0; i < values1.length; i++) {
      if (i != classIndex) {  // round up to float significance to be able to weed out duplicates later
	if (values2[i] != 0 && values1[i] != 0) { 
	  diffInstanceJSValues[i] =  0.5 * (values1[i] * Math.log(values1[i]) + values2[i] * Math.log(values2[i])
					    - (values1[i] + values2[i]) * Math.log((values1[i]+values2[i])/2.0)) / LOG2;
	} else if (values1[i] != 0) {
	  diffInstanceJSValues[i] = 0.5 * values1[i];
	} else if (values2[i] != 0) {
	  diffInstanceJSValues[i] = 0.5 * values2[i];
	} 
      } else {  // class value
	if (values1[i] == values2[i]) {
	  diffInstanceJSValues[i] = 1;
	} else {
	  diffInstanceJSValues[i] = -1;
	}
      }
    }
    Instance diffInstanceJS = new Instance(1.0, diffInstanceJSValues);
    diffInstanceJS.setDataset(instance1.dataset());
    return diffInstanceJS;
  }
  

  /**
   * Set the type of  distance to similarity conversion. Values other
   * than CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL will be ignored
   * 
   * @param type type of the similarity to distance conversion to use
   */
  public void setConversionType(SelectedTag conversionType) {
    if (conversionType.getTags() == TAGS_CONVERSION) {
      m_conversionType = conversionType.getSelectedTag().getID();
    }
  }

  /**
   * return the type of distance to similarity conversion
   * @return one of CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL
   */
  public SelectedTag getConversionType() {
    return new SelectedTag(m_conversionType, TAGS_CONVERSION);
  }

  /**
   * Set the type of  smoothing
   * 
   * @param type type of smoothing
   */
  public void setSmoothingType(SelectedTag smoothingType) {
    if (smoothingType.getTags() == TAGS_SMOOTHING) {
      m_smoothingType = smoothingType.getSelectedTag().getID();
    }
  }

  /**
   * return the type of smoothing
   * @return one of SMOOTHING_UNSMOOTHED, SMOOTHING_DIRICHLET, SMOOTHING_JELINEK_MERCER
   */
  public SelectedTag getSmoothingType() {
    return new SelectedTag(m_smoothingType, TAGS_SMOOTHING);
  }

  /** Set the pseudo-count value for Dirichlet smoothing
   * @param pseudoCountDirichlet the pseudocount value
   */
  public void setPseudoCountDirichlet(double pseudoCountDirichlet) {
    m_pseudoCountDirichlet = pseudoCountDirichlet;
  } 
   
  /** Get the pseudo-count value for Dirichlet smoothing
   * @return the pseudocount value
   */
  public double getPseudoCountDirichlet() {
    return m_pseudoCountDirichlet;
  } 

  /** Set the lambda parameter for Jelinek-Mercer smoothing
   * @param lambda
   */
  public void setLambdaJM(double lambdaJM) {
    m_lambdaJM = lambdaJM;
  }

  /** Get the lambda parameter for Jelinek-Mercer smoothing
   * @return lambda
   */
  public double getLambdaJM() {
    return m_lambdaJM;
  }

    /** The computation of a metric can be either based on distance, or on similarity
   * @returns true because euclidean metrict fundamentally computes distance
   */
  public boolean isDistanceBased() {
    return true;
  }

  /** Switch between regular KL divergence and I-divergence */
  public void setUseIDivergence(boolean useID) {
    m_useIDivergence = useID;
  } 

  /** Check whether regular KL divergence or I-divergence is used */
  public boolean getUseIDivergence() {
    return m_useIDivergence;
  } 
  
  /**
   * Given a cluster of instances, return the centroid of that cluster
   * @param instances objects belonging to a cluster
   * @param fastMode whether fast mode should be used for SparseInstances
   * @param normalized normalize centroids for SPKMeans
   * @return a centroid instance for the given cluster
   */
  public Instance getCentroidInstance(Instances instances, boolean fastMode, boolean normalized) {
    double [] values = new double[instances.numAttributes()];
    if (fastMode) {
      values = meanOrMode(instances); // uses fast meanOrMode
    } else {
      for (int j = 0; j < instances.numAttributes(); j++) {
	values[j] = instances.meanOrMode(j); // uses usual meanOrMode
      }
    }
    Instance centroid = new Instance(1.0, values);
    // cluster centroids are dense in SPKMeans
    if (normalized) {
      try {
	normalizeInstanceWeighted(centroid);
      } catch (Exception e) {
	e.printStackTrace();
      }
    }
    return centroid;
  }


  /**
   * Parses a given list of options. Valid options are:<p>
   *
   * -N <br>
   * Normalize the euclidean distance by vectors lengths
   *
   * -E <br>
   * Use exponential conversion from distance to similarity
   * (default laplacian conversion) <p>
   *
   * -U <br>
   * Use unit conversion from similarity to distance (dist=1-sim)
   * (default laplacian conversion) <p>
   *
   * -R <br>
   * The metric is trainable and will be trained using the current MetricLearner
   * (default non-trainable)
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    if (Utils.getFlag('E', options)) {
      setConversionType(new SelectedTag(CONVERSION_EXPONENTIAL, TAGS_CONVERSION));
    } else if (Utils.getFlag('U', options)) {
      setConversionType(new SelectedTag(CONVERSION_UNIT, TAGS_CONVERSION));
    } else {
      setConversionType(new SelectedTag(CONVERSION_LAPLACIAN, TAGS_CONVERSION));
    }

    if (Utils.getFlag('R', options)) {
      setTrainable(Utils.getFlag('R', options));
      setExternal(Utils.getFlag('X', options));
      String metricLearnerString = Utils.getOption('L', options);
      if (metricLearnerString.length() != 0) {
	String [] metricLearnerSpec = Utils.splitOptions(metricLearnerString);
	String metricLearnerName = metricLearnerSpec[0];
	metricLearnerSpec[0] = "";
	System.out.println("Got metric learner " + metricLearnerName + " spec: " + metricLearnerSpec);
	setMetricLearner(MetricLearner.forName(metricLearnerName, metricLearnerSpec));
      } 
    }      
    

    Utils.checkForRemainingOptions(options);
  }

  /**
   * Gets the classifier specification string, which contains the class name of
   * the classifier and any options to the classifier
   *
   * @return the classifier string.
   */
  protected String getMetricLearnerSpec() {
    if (m_metricLearner instanceof OptionHandler) {
      return m_metricLearner.getClass().getName() + " "
	+ Utils.joinOptions(((OptionHandler)m_metricLearner).getOptions());
    }
    return m_metricLearner.getClass().getName();
  }


  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(4);

    newVector.addElement(new Option("\tNormalize the euclidean distance by vectors lengths\n",
				    "N", 0, "-N"));
    newVector.addElement(new Option("\tUse exponential conversion from similarity to distance\n",
				    "E", 0, "-E"));
    newVector.addElement(new Option("\tUse unit conversion from similarity to distance\n",
				    "U", 0, "-U"));
    newVector.addElement(new Option("\tTrain the metric\n",
				    "R", 0, "-R"));
    newVector.addElement(new Option("\tUse the metric learner for similarity calculations(\"external\")",
				    "X", 0, "-X"));
    newVector.addElement(new Option(
	      "\tFull class name of metric learner to use, followed\n"
	      + "\tby scheme options. (required)\n"
	      + "\teg: \"weka.core.metrics.ClassifierMetricLearner -B weka.classifiers.function.SMO\"",
	      "L", 1, "-L <classifier specification>"));
    
    return newVector.elements();
  }

  /**
   * Gets the current settings of KLP.
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String [] getOptions() {

    String [] options = new String [55];
    int current = 0;

    if (m_useIDivergence) {
      options[current++] = "-I";
    }

    if (m_conversionType == CONVERSION_EXPONENTIAL) {
      options[current++] = "-E";
    } else if (m_conversionType == CONVERSION_UNIT) {
      options[current++] = "-U";
    }

    if (m_smoothingType == SMOOTHING_DIRICHLET) {
      options[current++] = "-D";
      options[current++] = "" + m_pseudoCountDirichlet;
    } else if (m_smoothingType == SMOOTHING_JELINEK_MERCER) {
      options[current++] = "-J";
      options[current++] = "" + m_lambdaJM;
    }

    if (m_useSmoothing) {
      options[current++] = "-S";
      options[current++] = "" + m_alpha;
      options[current++] = "-R";
      options[current++] = "" + m_alphaDecayRate;
    } 
    
    if (m_trainable) {
      options[current++] = "-R";
      if (m_external) {
	options[current++] = "-X";
      }

      options[current++] = "-L";
      options[current++] = Utils.removeSubstring(m_metricLearner.getClass().getName(), "weka.core.metrics.");
      if (m_metricLearner instanceof OptionHandler) {
	String[] metricOptions = ((OptionHandler)m_metricLearner).getOptions();
	for (int i = 0; i < metricOptions.length; i++) {
	  options[current++] = metricOptions[i];
	}
      }
    }
	
    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }

  /** Create a copy of this metric */
  public Object clone() {
    KL m = null; 
    m = (KL) super.clone();
    
    // clone the fields
    // for now clone a metric learner via serialization; TODO:  implement proper cloning in MetricLearners
    System.out.println("New alpha=" + m_alpha);
    try { 
      SerializedObject so = new SerializedObject(m_metricLearner);
      m.m_metricLearner = (MetricLearner) so.getObject();
    } catch (Exception e) {
      System.err.println("Problems cloning m_metricLearner while cloning KL");
    }
    return m;
  }

    
  public static void main(String[] args) {
    try {
//        // Create numeric attributes 
//        Attribute attr1 = new Attribute("attr1");
//        Attribute attr2 = new Attribute("attr2");
//        Attribute attr3 = new Attribute("attr3");
//        Attribute attr4 = new Attribute("attr4");
      
//        // Create vector to hold nominal values "first", "second", "third" 
//        FastVector my_nominal_values = new FastVector(3); 
//        my_nominal_values.addElement("first"); 
//        my_nominal_values.addElement("second"); 
//        my_nominal_values.addElement("third"); 
      
//        // Create nominal attribute "classAttr" 
//        Attribute classAttr = new Attribute("classAttr", my_nominal_values);
      
//        // Create vector of the above attributes 
//        FastVector attributes = new FastVector(4);
//        attributes.addElement(attr1);
//        attributes.addElement(attr2);
//        attributes.addElement(attr3);
//        attributes.addElement(attr4);
//        attributes.addElement(classAttr);
      
//        // Create the empty dataset with above attributes
//        Instances dataset = new Instances("dataset", attributes, 0);
      
//        // Make position the class attribute
//        dataset.setClassIndex(classAttr.index());
      
//        // Create a sparse instance with three attribute values
//        SparseInstance s_inst1 = new SparseInstance(1, new double[0], new int[0], 4);
//        s_inst1.setValue(attr1, 0.5);
//        s_inst1.setValue(attr3, 0.5);
//        s_inst1.setValue(classAttr, "third");

//        // Create a sparse instance with three attribute values
//        SparseInstance s_inst2 = new SparseInstance(1, new double[0], new int[0], 4);
//        s_inst2.setValue(attr2, 0.5);
//        s_inst2.setValue(attr3, 0.5);
//        s_inst2.setValue(classAttr,"second");

//        // Create a non-sparse instance with all attribute values
//        Instance inst1 = new Instance(5);
//        inst1.setValue(attr1, 3);
//        inst1.setValue(attr2, 4);
//        inst1.setValue(attr3, 5);
//        inst1.setValue(attr4, 2);
//        inst1.setValue(classAttr, "first");

//        // Create a sparse instance with three attribute values
//        Instance inst2 = new Instance(5);
//        inst2.setValue(attr1, 2);
//        inst2.setValue(attr2, 2);
//        inst2.setValue(attr3, 2);
//        inst2.setValue(attr4, 3);
//        inst2.setValue(classAttr, "second");
//        // Set instances' dataset to be the dataset "dataset"
//        s_inst1.setDataset(dataset);
//        s_inst2.setDataset(dataset);
//        inst1.setDataset(dataset);
//        inst2.setDataset(dataset);
      
//        // Print the instances
//        System.out.println("Sparse instance S1: " + s_inst1);
//        System.out.println("Sparse instance S2: " + s_inst2);
//        System.out.println("Non-sparse instance NS1: " + inst1);
//        System.out.println("Non-sparse instance NS2: " + inst2);
      
//        // Print the class attribute
//        System.out.println("Class attribute: " + s_inst1.classAttribute());
      
//        // Print the class index
//        System.out.println("Class index: " + s_inst1.classIndex());

//        // Create a new metric and print the distances
//        KL metric = new KL(4);
//        metric.setClassIndex(classAttr.index());
//        System.out.println("Distance between S1 and S2: " + metric.distanceJS(s_inst1, s_inst2));
//        System.out.println("Distance between S1 and NS1: " + metric.distanceJS(s_inst1, inst1));
//        System.out.println("Distance between NS1 and S1: " + metric.distanceJS(inst1, s_inst1));
//        System.out.println("Distance between NS1 and NS2: " + metric.distanceJS(inst1, inst2));
//        System.out.println("\nDistance-similarity conversion type: " +
//  			 metric.getConversionType().getSelectedTag().getReadable());
//        System.out.println("Similarity between S1 and S2: " + metric.similarity(s_inst1, s_inst2));
//        System.out.println("Similarity between S1 and NS1: " + metric.similarity(s_inst1, inst1));
//        System.out.println("Similarity between NS1 and S1: " + metric.similarity(inst1, s_inst1));
//        System.out.println("Similarity between NS1 and NS2: " + metric.similarity(inst1, inst2));
//        System.out.println();
//        System.out.println("Difference instance S1-S2: " + metric.createDiffInstance(s_inst1, s_inst2));
//        System.out.println("Difference instance S1-NS1: " + metric.createDiffInstance(s_inst1, inst1));
//        System.out.println("Difference instance NS1-S1: " + metric.createDiffInstance(inst1, s_inst1));
//        System.out.println("Difference instance NS1-NS2: " + metric.createDiffInstance(inst1, inst2));



      Instances instances = new Instances(new FileReader ("/tmp/INST.arff"));

      KL metric = new KL();
      metric.buildMetric(instances);

      Instance newCentroid = instances.instance(0);
      Instance oldCentroid = instances.instance(1);
      instances.delete(0);
      instances.delete(0);

      metric.setSmoothingType(new SelectedTag(SMOOTHING_DIRICHLET, TAGS_SMOOTHING));
      metric.setUseIDivergence(true);

      double [] values = new double[instances.numAttributes()]; 

      for (int i = 0; i < instances.numInstances(); i++) {
	Instance instance = instances.instance(i);
	for (int j = 0; j < instance.numAttributes(); j++) {
	  values[j] += instance.value(j); 
	}
      }

      for (int j = 0; j < instances.numAttributes(); j++) {
	values[j] /= instances.numInstances();
      }
      Instance saneCentroid = new SparseInstance(1.0, values);

      
      System.out.println("NumInstances=" + instances.numInstances());
      double prevTotal=0, currTotal=0, saneTotal=0;
      for (int i = 0; i < instances.numInstances(); i++) {
	double prevPen = metric.distanceNonSparse(instances.instance(i), oldCentroid);
	double currPen = metric.distanceNonSparse(instances.instance(i), newCentroid);
	double sanePen = metric.distanceNonSparse(instances.instance(i), saneCentroid);
	prevTotal += prevPen;
	currTotal += currPen;
	saneTotal += sanePen; 
	System.out.println(prevPen + " -> " + currPen + "\t" + sanePen);
      }
      System.out.println("====\n" + prevTotal + "\t" + currTotal + "\t" + saneTotal); 
      
      
    } catch (Exception e) {
      e.printStackTrace();
    }	
  } 
}