/** * Global Sensor Networks (GSN) Source Code * Copyright (c) 2006-2016, Ecole Polytechnique Federale de Lausanne (EPFL) * * This file is part of GSN. * * GSN is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * GSN is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GSN. If not, see <http://www.gnu.org/licenses/>. * * File: src/ch/epfl/gsn/utils/models/helper/Tools.java * * @author Julien Eberle * @author Sofiane Sarni * */ package ch.epfl.gsn.utils.models.helper; import weka.classifiers.Classifier; import weka.classifiers.functions.LibSVM; import weka.classifiers.functions.LinearRegression; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import weka.core.SelectedTag; /** * some useful tools for working with the results of a classifier * */ public class Tools { /** * get the list of classification errors for each instance in the dataset * @param c the classifier * @param i the dataset * @return the list of errors * @throws Exception */ public static double[] get_errors(Classifier c, Instances i) throws Exception{ double[] computed = new double[i.numInstances()]; for(int m = 0;m<computed.length;m++){ double s = c.classifyInstance(i.instance(m)); double r = i.instance(m).value(i.classAttribute()); computed[m] = (r-s)*(r-s); } return computed; } /** * get the average error of the classifier over the given dataset * @param c the classifier * @param i the dataset * @return the average error * @throws Exception */ public static double get_avg_error(Classifier c, Instances i) throws Exception{ double computed = 0; for(int m = 0;m<i.numInstances();m++){ double s = c.classifyInstance(i.instance(m)); double r = i.instance(m).value(i.classAttribute()); computed += (r-s)*(r-s); } return computed/i.numInstances(); } /** * add a new feature in the dataset containing the predicted values by the classifier * @param c the classifier * @param i the dataset * @throws Exception */ public static void add_predictions(Classifier c, Instances i) throws Exception{ double[] computed = new double[i.numInstances()]; for(int m = 0;m<computed.length;m++){ computed[m] = c.classifyInstance(i.instance(m)); } Attribute a = new Attribute("interpolate"); int num = i.numAttributes(); i.insertAttributeAt(a, num); for(int m = 0;m<computed.length;m++){ i.instance(m).setValue(num, computed[m]); } } /** * get a classifier by its name. * This function can be used to set the parameter of the classifiers * @param name * @return */ public static Classifier getClassifierById(int id){ Classifier c = null; if(id == 0){ LibSVM sv = new LibSVM(); sv.setSVMType(new SelectedTag(LibSVM.SVMTYPE_EPSILON_SVR,LibSVM.TAGS_SVMTYPE)); sv.setCost(Math.pow(2, 2)); sv.setGamma(Math.pow(2, 1)); sv.setEps(0.00001); c=sv; } else if(id == 1){ c = new LinearRegression(); } return c; } /** * pre-process the data be normalizing and removing unused attributes * @param i * @return */ public static Instances prepareInstances(Instances i){ //select features to use i.setClassIndex(9); i.deleteAttributeAt(8); i.deleteAttributeAt(7); i.deleteAttributeAt(6); i.deleteAttributeAt(2); i.deleteAttributeAt(1); //scale the values for(int k=0;k<i.numInstances();k++){ Instance j = i.instance(k); j.setValue(0, j.value(0)/1400.0); j.setValue(2, j.value(2)/50); j.setValue(3, j.value(3)/100.0); j.setValue(4, j.value(4)/100.0 - 4); } return i; } }