/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MPCKMeans.java
* Copyright (C) 2003 Sugato Basu and Misha Bilenko
*
*/
package weka.clusterers;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* Utils useful for clustering
*/
public class ClusterUtils {
/** Normalizes Instance or SparseInstance
*
* @author Sugato Basu
* @param inst Instance to be normalized
*/
public static void normalize(Instance inst) throws Exception {
if (inst instanceof SparseInstance) {
normalizeSparseInstance(inst);
}
else {
normalizeInstance(inst);
}
}
/** Normalizes the values of a normal Instance in L2 norm
*
* @author Sugato Basu
* @param inst Instance to be normalized
*/
public static void normalizeInstance(Instance inst) throws Exception{
double norm = 0;
double values [] = inst.toDoubleArray();
if (inst instanceof SparseInstance) {
System.err.println("Is SparseInstance, using normalizeSparseInstance function instead");
normalizeSparseInstance(inst);
}
for (int i=0; i<values.length; i++) {
if (i != inst.classIndex()) { // don't normalize the class index
norm += values[i] * values[i];
}
}
norm = Math.sqrt(norm);
for (int i=0; i<values.length; i++) {
if (i != inst.classIndex()) { // don't normalize the class index
values[i] /= norm;
}
}
inst.setValueArray(values);
}
/** Normalizes the values of a SparseInstance in L2 norm
*
* @author Sugato Basu
* @param inst SparseInstance to be normalized
*/
public static void normalizeSparseInstance(Instance inst) throws Exception{
double norm=0;
int length = inst.numValues();
if (!(inst instanceof SparseInstance)) {
System.err.println("Not SparseInstance, using normalizeInstance function instead");
normalizeInstance(inst);
}
for (int i=0; i<length; i++) {
if (inst.index(i) != inst.classIndex()) { // don't normalize the class index
norm += inst.valueSparse(i) * inst.valueSparse(i);
}
}
norm = Math.sqrt(norm);
for (int i=0; i<length; i++) { // don't normalize the class index
if (inst.index(i) != inst.classIndex()) {
inst.setValueSparse(i, inst.valueSparse(i)/norm);
}
}
}
/** Normalize an array of double's
*/
public static double[] normalize(double[] weights) {
double sum = 0;
for (int i = 0; i < weights.length; i++) {
sum += weights[i];
}
if (sum != 0) {
for(int i = 0; i < weights.length; i++) {
weights[i] = weights[i] / sum;
}
}
return weights;
}
/** Fast version of meanOrMode - streamlined from Instances.meanOrMode for efficiency
* Does not check for missing attributes, assumes numeric attributes, assumes Sparse instances
*/
public static double[] meanOrMode(Instances insts) {
int numAttributes = insts.numAttributes();
double [] value = new double[numAttributes];
double weight = 0;
for (int i=0; i<numAttributes; i++) {
value[i] = 0;
}
for (int j=0; j<insts.numInstances(); j++) {
SparseInstance inst = (SparseInstance) (insts.instance(j));
weight += inst.weight();
for (int i=0; i<inst.numValues(); i++) {
int indexOfIndex = inst.index(i);
value[indexOfIndex] += inst.weight() * inst.valueSparse(i);
}
}
if (Utils.eq(weight, 0)) {
for (int k=0; k<numAttributes; k++) {
value[k] = 0;
}
}
else {
for (int k=0; k<numAttributes; k++) {
value[k] = value[k] / weight;
}
}
return value;
}
/** This function divides every attribute value in an instance by
* the instance weight -- useful to find the mean of a cluster in
* Euclidean space
* @param inst Instance passed in for normalization (destructive update)
*/
public static void normalizeByWeight(Instance inst) {
double weight = inst.weight();
if (inst instanceof SparseInstance) {
for (int i=0; i<inst.numValues(); i++) {
inst.setValueSparse(i, inst.valueSparse(i)/weight);
}
}
else if (!(inst instanceof SparseInstance)) {
for (int i=0; i<inst.numAttributes(); i++) {
inst.setValue(i, inst.value(i)/weight);
}
}
}
/** Finds the sum of instance sum with instance inst
*/
public static Instance sumWithInstance(Instance sum, Instance inst, Instances m_Instances) throws Exception {
Instance newSum;
if (sum == null) {
if (inst instanceof SparseInstance) {
newSum = new SparseInstance(inst);
newSum.setDataset(m_Instances);
}
else {
newSum = new Instance(inst);
newSum.setDataset(m_Instances);
}
}
else {
newSum = sumInstances(sum, inst, m_Instances);
}
return newSum;
}
/** Finds sum of 2 instances (handles sparse and non-sparse)
*/
public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
int numAttributes = inst1.numAttributes();
if (inst2.numAttributes() != numAttributes) {
throw new Exception ("Error!! inst1 and inst2 should have same number of attributes.");
}
double weight1 = inst1.weight(), weight2 = inst2.weight();
double [] values = new double[numAttributes];
for (int i=0; i<numAttributes; i++) {
values[i] = 0;
}
if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
for (int i=0; i<inst1.numValues(); i++) {
int indexOfIndex = inst1.index(i);
values[indexOfIndex] = inst1.valueSparse(i);
}
for (int i=0; i<inst2.numValues(); i++) {
int indexOfIndex = inst2.index(i);
values[indexOfIndex] += inst2.valueSparse(i);
}
SparseInstance newInst = new SparseInstance(weight1+weight2, values);
newInst.setDataset(m_Instances);
return newInst;
}
else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)){
for (int i=0; i<numAttributes; i++) {
values[i] = inst1.value(i) + inst2.value(i);
}
}
else {
throw new Exception ("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
}
Instance newInst = new Instance(weight1+weight2, values);
newInst.setDataset(m_Instances);
return newInst;
}
/**
* Gets a Double representing the current date and time.
* eg: 1:46pm on 20/5/1999 -> 19990520.1346
*
* @return a value of type Double
*/
public static Double getTimeStamp() {
Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
double timestamp = now.getTimeInMillis();
return new Double(timestamp);
}
}