/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Complexity_Metrics;
import java.util.*;
import java.io.*;
/**
* This is the main class of the ComplexityMetrics library
*
* <p>
* @author Written by Albert Orriols and Nuria Macia (La Salle, Universitat Ramon Llull) 27/05/2010
* @version 1.1
* @since JDK1.2
* </p>
*/
public class ComplexityMetrics {
/** Dataset **/
private keel.Dataset.InstanceSet dSet;
/** Number of examples in the data set */
private int numberOfExamples;
/** Number of attributes in the data set */
private int numberOfAttributes;
/** Examples normalized from the KEEL Data set*/
private double [][] example;
/** Class of each normalized example */
private int [] classOfExample;
/** Number of classes */
private int numberOfClasses;
/** Number of examples per class */
private int []numExamplesPerClass;
/** Examples organized per class (index examplesPerClass[class][instance]). */
private double [][][]examplesPerClass;
/** Corresponding position of the instance in the example vector (index indexExamplesPerClass[class][instance]). */
private int [][]indexExamplesPerClass;
/** Statistics **/
private Statistics stats;
/** Training file */
private String datasetName;
/** Output file name */
private String outputFileName;
/** Result of F1 */
private double F1;
/** Result of F2 */
private double F2;
/** Result of F3 */
private double F3;
/** Result of N1 */
private double N1;
/** Result of N2 */
private double N2;
/** Result of N3 */
private double N3;
/** Result of N4 */
private double N4;
/** Result of L1 */
private double L1;
/** Result of L2 */
private double L2;
/** Result of L3 */
private double L3;
/** Result of T1 */
private double T1;
/** Result of T2 */
private double T2;
/** Random numbers generator */
private Random rndObject;
/** Seed of the random object */
private double seed;
/** Indicates whether F1 has to be computed */
private boolean computeF1;
/** Indicates whether F2 has to be computed */
private boolean computeF2;
/** Indicates whether F3 has to be computed */
private boolean computeF3;
/** Indicates whether N1 has to be computed */
private boolean computeN1;
/** Indicates whether N2 has to be computed */
private boolean computeN2;
/** Indicates whether N3 has to be computed */
private boolean computeN3;
/** Indicates whether N4 has to be computed */
private boolean computeN4;
/** Indicates whether L1 has to be computed */
private boolean computeL1;
/** Indicates whether L2 has to be computed */
private boolean computeL2;
/** Indicates whether L3 has to be computed */
private boolean computeL3;
/** Indicates whether T1 has to be computed */
private boolean computeT1;
/** Indicates whether T2 has to be computed */
private boolean computeT2;
/** Parameter C for SVM training */
final double C = 0.05;
/** Parameter TOLERANCE for SVM training */
final double TOLERANCE = 0.001;
/** Parameter EPSILON for SVM training */
final double EPSILON = 0.001;
ComplexityMetrics ( String configFileName ) {
System.out.println ( " > Creating the complexity metrics object with the configuration file: " + configFileName );
// Initialize parameters
numExamplesPerClass = null;
seed = 1;
rndObject = new Random();
computeF1 = computeF2 = computeF3 = computeN1 = computeN2 = computeN3 = computeN4 = true;
computeL1 = computeL2 = computeL3 = computeT1 = computeT2 = true;
F1 = F2 = F3 = N1 = N2 = N3 = N4 = L1 = L2 = L3 = T1 = T2 = -1.;
//Initializing the names of the output file and the dataset
outputFileName = datasetName = null;
// Parse the configuration file
parseConfigFile( configFileName );
rndObject.setSeed ( (long) seed );
// Create the data set
dSet = new keel.Dataset.InstanceSet ();
// Read the data set
try {
dSet.readSet ( datasetName, true );
} catch ( Exception e ) {
System.out.println ( " > The data set could not be correctly loaded " );
e.printStackTrace();
}
extractDatasetInformation();
// Make statistics
stats = new Statistics ( dSet, numberOfClasses );
stats.run ( example, classOfExample, numberOfExamples, numberOfAttributes );
} // end ComplexityMetrics
/**
* Parse the configuration file
* @attribute configFileName is the name of the configuration file
*/
private void parseConfigFile ( String configFileName ) {
String line, varName, aux;
try{
System.out.println ( " > Parsing the file: " + configFileName );
BufferedReader fin = new BufferedReader ( new FileReader(configFileName) );
line = fin.readLine();
while ( line != null ) {
if ( line.length() == 0 ) {
line = fin.readLine();
continue;
}
StringTokenizer st = new StringTokenizer( line );
varName = st.nextToken();
st.nextToken(); // Disregarding '='
if ( varName.equalsIgnoreCase("algorithm") ) {
// Do not store the algorithm name
} else if ( varName.equalsIgnoreCase( "inputdata" ) ) {
aux = st.nextToken();
datasetName = aux.substring ( 1, aux.length() - 1 );
System.out.println ( " > Input data set: " + datasetName );
} else if ( varName.equalsIgnoreCase ( "outputdata" ) ) {
aux = st.nextToken();
outputFileName = aux.substring ( 1,aux.length() - 1 );
System.out.println ( " > Output data set: " + outputFileName );
} else if ( varName.equalsIgnoreCase ( "runF1" ) ) {
computeF1 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run F1: " + computeF1 );
} else if ( varName.equalsIgnoreCase ( "runF2" ) ){
computeF2 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run F2: " + computeF2 );
} else if ( varName.equalsIgnoreCase ( "runF3" ) ) {
computeF3 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run F3: " + computeF3 );
} else if ( varName.equalsIgnoreCase ( "runN1" ) ) {
computeN1 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run N1: " + computeN1 );
} else if ( varName.equalsIgnoreCase ( "runN2" ) ) {
computeN2 = Boolean.parseBoolean( st.nextToken() );
System.out.println ( " > Run N2: " + computeN2 );
} else if ( varName.equalsIgnoreCase ( "runN3" ) ) {
computeN3 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run N3: " + computeN3 );
} else if ( varName.equalsIgnoreCase ( "runN4" ) ) {
computeN4 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run N4: " + computeN4 );
} else if ( varName.equalsIgnoreCase ( "runL1" ) ) {
computeL1 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run L1: " + computeL1 );
} else if ( varName.equalsIgnoreCase ( "runL2" ) ) {
computeL2 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run L2: " + computeL2 );
} else if ( varName.equalsIgnoreCase ( "runL3" ) ) {
computeL3 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run L3: " + computeL3 );
} else if ( varName.equalsIgnoreCase ( "runT1" ) ) {
computeT1 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run T1: " + computeT1 );
} else if ( varName.equalsIgnoreCase ( "runT2" ) ) {
computeT2 = Boolean.parseBoolean ( st.nextToken() );
System.out.println ( " > Run T2: " + computeT2 );
} else if ( varName.equalsIgnoreCase ( "seed" ) ) {
seed = Double.parseDouble ( st.nextToken() );
System.out.println ( " > Seed: " + seed );
}
// Read next line
line = fin.readLine();
}
} catch ( Exception e ) {
System.err.println ( "Error reading the configuration file" );
e.printStackTrace();
}
} // end configFileName
/**
* Extract the information from the data set in order to quickly process
* the complexity measures.
*/
private void extractDatasetInformation(){
// Getting information about attributes and examples
numberOfExamples = dSet.getNumInstances();
numberOfAttributes = keel.Dataset.Attributes.getInputNumAttributes();
example = new double [ numberOfExamples ][ numberOfAttributes ];
classOfExample = new int [ numberOfExamples ];
numberOfClasses = 2;
// Getting all the instances
for ( int i = 0; i < numberOfExamples; i++) {
example[i] = dSet.getInstance(i).getNormalizedInputValues();
classOfExample[i] = (int) dSet.getInstance(i).getNormalizedOutputValues()[0];
if ( numberOfClasses < classOfExample[i] + 1 ) {
numberOfClasses = classOfExample[i] + 1;
}
}
System.out.println ( " \n\n\n " );
System.out.println ( " > The attributes number (without counting the class attribute) is: " + numberOfAttributes );
System.out.println (" > The examples number is: " + numberOfExamples );
} // end getInstancesInformation
void run () {
if ( computeF1 ) runF1();
if ( computeF2 ) runF2();
if ( computeF3 ) runF3();
if ( computeN1 ) runN1();
if ( computeN2 ) runN2();
if ( computeN3 ) runN3();
if ( computeN4 ) runN4();
if ( computeL1 ) runL1();
if ( computeL2 ) runL2();
if ( computeL3 ) runL3();
if ( computeT1 ) runT1();
if ( computeT2 ) runT2();
dumpResultsToScreen();
writeMetricsToFile();
} // end run
void dumpResultsToScreen () {
System.out.println ( " \n\n > Results of the complexity metrics: " );
if ( computeF1 ) System.out.println ( " F1: " + F1 );
if ( computeF2 ) System.out.println ( " F2: " + F2 );
if ( computeF3 ) System.out.println ( " F3: " + F3 );
if ( computeN1 ) System.out.println ( " N1: " + N1 );
if ( computeN2 ) System.out.println ( " N2: " + N2 );
if ( computeN3 ) System.out.println ( " N3: " + N3 );
if ( computeN4 ) System.out.println ( " N4: " + N4 );
if ( computeL1 ) System.out.println ( " L1: " + L1 );
if ( computeL2 ) System.out.println ( " L2: " + L2 );
if ( computeL3 ) System.out.println ( " L3: " + L3 );
if ( computeT1 ) System.out.println ( " T1: " + T1 );
if ( computeT2 ) System.out.println ( " T2: " + T2 );
} // end dumpResultsToScreen
void writeMetricsToFile () {
PrintWriter fout = null;
try {
fout = new PrintWriter(new BufferedWriter(new FileWriter( outputFileName )));
fout.println ( "Find below the results of the complexity metrics: \n" );
if ( computeF1 ) fout.println ( " F1: " + F1 );
if ( computeF2 ) fout.println ( " F2: " + F2 );
if ( computeF3 ) fout.println ( " F3: " + F3 );
if ( computeN1 ) fout.println ( " N1: " + N1 );
if ( computeN2 ) fout.println ( " N2: " + N2 );
if ( computeN3 ) fout.println ( " N3: " + N3 );
if ( computeN4 ) fout.println ( " N4: " + N4 );
if ( computeL1 ) fout.println ( " L1: " + L1 );
if ( computeL2 ) fout.println ( " L2: " + L2 );
if ( computeL3 ) fout.println ( " L3: " + L3 );
if ( computeT1 ) fout.println ( " T1: " + T1 );
if ( computeT2 ) fout.println ( " T2: " + T2 );
fout.close();
} catch ( Exception e ) {
System.err.println ( " > [ERROR]: Printing the results to the output file: " + outputFileName );
}
} // end writeMetricsToFile
void runF1() {
int i;
double fisher;
F1 = Double.MIN_VALUE;
if ( numberOfClasses > 2 ) {
System.out.println ( " >> [WARNING] This metric is devised for two-class problems. Only the two first classes of the problem will be considered " );
}
// Compute Fisher's discriminant for each attribute
for ( i = 0; i < numberOfAttributes; i++ ) {
fisher = Math.pow( stats.getMean( i, 0 ) - stats.getMean( i, 1 ), 2 ) /
( stats.getVariance( i, 0 ) + stats.getVariance( i, 1 ) ) ;
if ( fisher > F1 ) {
F1 = fisher;
}
}
System.out.println( " F1: " + F1 ) ;
} // end runF1
void runF2() {
int i;
double minmin;
double minmax;
double maxmin;
double maxmax;
if ( numberOfClasses > 2 ) {
System.out.println ( " >> [WARNING] This metric is devised for two-class problems. Only the two first classes of the problem will be considered " );
}
F2 = 1.0;
// Compute the volume overlap
for ( i = 0; i < numberOfAttributes; i++ ) {
minmin = Math.min( stats.getMin( i, 0 ), stats.getMin( i, 1 ) );
minmax = Math.min( stats.getMax( i, 0 ), stats.getMax( i, 1 ) );
maxmin = Math.max( stats.getMin( i, 0 ), stats.getMin( i, 1 ) );
maxmax = Math.max( stats.getMax( i, 0 ), stats.getMax( i, 1 ) );
F2 *= ( minmax - maxmin ) / ( maxmax - minmin );
}
System.out.println( " F2: " + F2 ) ;
} // end runF2
void runF3() {
int i, j;
int bestAtt;
boolean finish = false;
double overlapMin, overlapMax;
double[] discPower;
double[] initialDiscPower; // discPower and initialDiscPower variables store the discriminative power of each attribute
double[] cumulDiscPower; // Cumulative discriminant power (considering more discriminant variables)
int[] order; // Maintains the order of the attributes
boolean[] pointDisc; // Indicates whether the point has been discriminated
int numAttRemain, numExRemain; // Number of attributes and examples that remain to process
int mostDiscrAtt = 0;
double discPowerOfTheBest = 0.0;
// Metric only applicable to 2-class problems
if ( numberOfClasses != 2 ) {
System.out.println ( " > [WARNING in Feature Efficiency] Applying Maximum efficiency to a " + numberOfClasses + "-class data set. " );
}
// Organize examples per class
organizePerClass();
// Initialize variables
numAttRemain = numberOfAttributes;
numExRemain = numberOfExamples;
initialDiscPower = new double [ numberOfAttributes ];
discPower = new double [ numberOfAttributes ];
cumulDiscPower = new double [ numberOfAttributes ];
order = new int [ numberOfAttributes ];
for ( i = 0; i < numberOfAttributes; i++ ) {
discPower[i] = 0;
initialDiscPower[i] = 0;
cumulDiscPower[i] = 0;
order[i] = i;
}
pointDisc = new boolean [ numberOfExamples ];
for ( i = 0; i < numberOfExamples; i++ ) {
pointDisc[i] = false;
}
while ( !finish ) {
// Get the discriminative power of each of the remaining attributes
finish = getDiscriminativePowerOfAttributes ( discPower, order, numAttRemain, pointDisc );
// If we are in the first iteration, we store the initial discriminatory power of each attribute
if ( numAttRemain == numberOfAttributes ) {
for ( i = 0; i < numberOfAttributes; i++ ) {
initialDiscPower[i] = discPower[i];
}
}
// Order the attributes depending on their discriminatory power
quickSort ( discPower, order, 0, numAttRemain - 1 );
// Update the cumulative disciminatory power of the best attribute
cumulDiscPower[ order[ numAttRemain - 1 ] ] = discPower [ numAttRemain - 1 ];
// Store the most discriminative attribute if it is the first iteration
if ( numAttRemain == numberOfAttributes ) {
// The most discriminative attribute is the last in the vector.
mostDiscrAtt = order[ numAttRemain - 1 ];
discPowerOfTheBest = (double) discPower [ numAttRemain - 1 ] / (double) numberOfExamples;
}
// Use the first attribute. So, one less attribute remaining
numAttRemain --;
// Reset the discriminatory power of unused variables
for ( i = 0; i < numAttRemain; i++ ) {
discPower [i] = 0;
}
// Check the number of examples that we can discriminate according to the most discriminative attribute.
bestAtt = order[ numAttRemain ];
overlapMin = Math.max ( stats.getMin( bestAtt, 0 ), stats.getMin( bestAtt, 1 ) );
overlapMax = Math.min ( stats.getMax( bestAtt, 0 ), stats.getMax( bestAtt, 1 ) );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( !pointDisc[i] &&
( example[i][ bestAtt ] < overlapMin ||
example[i][ bestAtt ] > overlapMax ) ) {
pointDisc[i] = true;
numExRemain --;
}
}
if ( numExRemain == 0 || numAttRemain == 0 ) finish = true;
}
F3 = discPowerOfTheBest;
System.out.println ( " F3: " + F3 );
} // end runF3
private boolean getDiscriminativePowerOfAttributes ( double[] discPower, int[] order, int numAttRemain, boolean[] pointDisc ) {
int i, j, att;
double overlapMin, overlapMax;
boolean finish = false;
for ( j = 0; j < numAttRemain; j++ ) {
att = order[j];
// Get the maximum of the minimums, and the minimum of the maximums
overlapMin = stats.getMin( att, 0 );
overlapMax = stats.getMax( att, 0 );
for ( i = 1; i < 2; i++ ) {
if ( stats.getMin( att, i ) > overlapMin ) {
overlapMin = stats.getMin( att, i );
}
if ( stats.getMax( att, i ) < overlapMax ) {
overlapMax = stats.getMax( att, i );
}
}
if ( overlapMin > overlapMax ) { // The attribute completely discriminates all the examples per class.
discPower[j] = (double) numberOfExamples;
// Substract all the examples that have been discriminated.
for ( i = 0; i < numberOfAttributes; i++ ) {
if ( i != j ) {
discPower[j] -= discPower[i];
}
}
finish = true;
}
else { // Count the number of examples that are truly discriminated by the attribute.
for ( i = 0; i < numberOfExamples; i++ ) {
if ( !pointDisc[i] &&
( example[i][ att ] < overlapMin ||
example[i][ att ] > overlapMax ) ) {
discPower[j] ++;
}
}
}
}
return finish;
} // end getDiscriminativePowerOfAttributes
private void runN1() {
int different = 0;
int[] node;
int[][] spanTree = new int [ numberOfExamples - 1 ][2];
for ( int i = 0; i < numberOfExamples - 1; i++ ) {
for ( int j = 0; j < 2; j++ ) {
spanTree[i][j] = 0;
}
}
spanTree = computePrim();
node = new int [ numberOfExamples ];
for ( int i = 0; i < numberOfExamples; i++ ) {
node[i] = -1;
}
// Store the nodes of the spanning tree with different class
for ( int i = 0; i < numberOfExamples - 1; i++ ) {
if ( classOfExample[ spanTree[i][0] ] != classOfExample[ spanTree[i][1] ] ) {
node[ spanTree[i][0] ] = 0;
node[ spanTree[i][1] ] = 0;
}
}
// Compute the number of nodes of the spanning tree with different class
for ( int i = 0; i < numberOfExamples; i++ ) {
if ( node[i] == 0 ) different ++;
}
N1 = (double) different / (double) numberOfExamples;
System.out.println( " N1: " + N1 );
} // end runN1
private int selectMinNotTreated ( int[] neig, double[] edge ) {
int i;
int min = -1;
double distMin = Double.MAX_VALUE;
for ( i = 0; i < numberOfExamples; i++ ) {
if ( ( neig[i] != -1 ) && ( edge[i] < distMin ) ) {
distMin = edge[i];
min = i;
}
}
return min;
} // end selectMinNotTreated
private double getApproximateDistance ( int ex1, int ex2 ) {
int att;
double dist = 0;
for ( att = 0; att < numberOfAttributes; att++ ) {
dist += Math.pow ( (double)( example[ ex1 ][ att ] - example[ ex2 ][ att ]), (double)2 );
}
return dist;
} // end getApproximateDistance
/**
* <p>
* It computes the minimum spanning tree of the dataset
* </p>
*
* @return the minimum spanning tree (pairs of connected examples)
*/
private int[][] computePrim () {
int currentNode;
int i, j;
int spanTreeIndex = 0;
// Spanning tree: pairs of examples
int[][] spanTree = new int[ numberOfExamples - 1 ][2];
// Structures to maintain the neighbor closer and the edge to this neighbor
int[] neig = new int [ numberOfExamples ];
double[] edge = new double [ numberOfExamples ];
// Choose a vertex, which will be the seed of the spanning tree: example 0
currentNode = 0;
// Initialize the structures considering that we have a complete GRAPH (all nodes connected)
neig[ currentNode ] = -1; // Indicates that the node has been processed
edge[ currentNode ] = 0;
for ( i = 1; i < numberOfExamples; i++ ) {
neig[i] = currentNode;
edge[i] = getApproximateDistance ( currentNode, i );
}
// Create the minimum spanning tree (MST)
for ( i = 1; i < numberOfExamples; i++ ) {
// Select the vertex, not treated yet, with minimum distance
currentNode = selectMinNotTreated ( neig, edge );
// Add this vertex to the spanning tree
spanTree[ spanTreeIndex ][0] = currentNode;
spanTree[ spanTreeIndex ][1] = neig[ currentNode ];
spanTreeIndex ++;
// Check the vertex as processed
neig[ currentNode ] = -1;
// Recalculate the distances of nearest neighbors.
for ( j = 0; j < numberOfExamples; j++ ) {
if ( neig[j] != -1 && edge[j] > getApproximateDistance ( currentNode, j ) ) {
neig[j] = currentNode;
edge[j] = getApproximateDistance ( currentNode, j );
}
}
}
return spanTree;
} // end computePrim
private void runN2() {
int i, j;
int neigIntra, neigInter;
double minDistIntra, minDistInter;
double distIntraClass, distInterClass;
double distAux;
distIntraClass = 0;
distInterClass = 0;
for ( i = 0; i < numberOfExamples; i++ ) {
// Get nearest neighbor intra and inter class
distAux = 0;
neigInter = -1;
neigIntra = -1;
minDistInter = Double.MAX_VALUE;
minDistIntra = Double.MAX_VALUE;
for ( j = 0; j < numberOfExamples; j++ ) {
if ( j != i ) {
distAux = getDistance ( i, j );
if ( classOfExample[j] == classOfExample[i] && distAux < minDistIntra ) {
neigIntra = j;
minDistIntra = distAux;
}
else if ( classOfExample[j] != classOfExample[i] && distAux < minDistInter ) {
neigInter = j;
minDistInter = distAux;
}
}
}
if ( neigInter == -1 ) {
minDistInter = 0;
}
if ( neigIntra == -1 ) {
minDistIntra = 0;
}
distIntraClass += minDistIntra;
distInterClass += minDistInter;
}
if ( distInterClass != 0 ) {
N2 = distIntraClass / distInterClass ;
System.out.println ( " N2: " + N2 );
}
else {
System.out.println ( " Error: " ) ;
}
} // end runN2
private int getNearestNeighborOfExample ( int example, double minDist ) {
int i;
int neig = -1;
minDist = Double.MAX_VALUE;
for ( i = 0; i < numberOfExamples; i++ ) {
if ( i != example && getDistance ( example, i ) < minDist ) {
neig = i;
minDist = getDistance ( example, i );
}
}
return neig;
} // end getNearestNeighborOfExample
void runN3() {
// Run the KNN algorithm on the train set.
System.out.println ( "\n\n > Running N3: Testing the KNN with the train instances \n" );
N3 = runKNN ( 1, example, classOfExample, numberOfExamples, true );
System.out.println ( " N3: " + N3 );
} // end runN3
double runKNN ( int k, double[][] testExamples, int[] classOfTestExamples, int numberOfTestExamples, boolean isTrain ) {
int i, j;
double dist;
double minDist = 10000000;
int minIndex = 0;
int numCorrect = 0;
for ( i = 0; i < numberOfTestExamples; i++ ) {
minDist = 10000000;
minIndex = 0;
for ( j = 0; j < numberOfExamples; j++ ) {
if ( !isTrain || i != j ) {
dist = getApproximateDistance ( testExamples[i], example[j] );
if ( dist < minDist ) {
minIndex = j;
minDist = dist;
}
}
}
if ( classOfTestExamples[i] == classOfExample[ minIndex ] ) {
numCorrect ++;
}
}
return ( 1 - ( (double)numCorrect / (double)numberOfTestExamples) );
} // end runKNN
/**
*
* It returns the approximate distance between 2 examples
*
* @param ex1 the first example
* @param ex2 the second example
* @return the distance between the 2 examples
*/
double getApproximateDistance ( double[] ex1, double[] ex2 ) {
int i;
double dist = 0;
for ( i = 0; i < numberOfAttributes; i++ ) {
dist += Math.pow ( ex1[i] - ex2[i], 2.0 );
}
return dist;
} // end getApproximateDistance
double getDistance ( double[] ex1, double[] ex2 ) {
return ( Math.pow ( getApproximateDistance( ex1, ex2 ), 0.5 ) );
} // end getDistance
double getDistance ( int ex1, int ex2 ){
return ( Math.pow ( getApproximateDistance ( example[ ex1 ], example[ ex2 ] ), 0.5 ) );
} // end getDistance
void runN4() {
double [][] testExamples;
int [] classOfTestExamples;
int cClass;
int numInstToGeneratePerClass = 1000;
// 0. Check that all classes have examples.
organizePerClass ();
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
if ( numExamplesPerClass[cClass] < 1 ) {
System.err.println ( " > [ERROR in N4] Error in computing the nonlinearity of the KNN classifier. " );
System.err.println ( " >> Class " + cClass + " has 0 instances. " );
N4 = -1;
return;
}
}
// 1. Create the convex hull.
System.out.println ( " > Generating " + numInstToGeneratePerClass + " by means of interpolation " );
testExamples = new double [ numInstToGeneratePerClass * numberOfClasses ][];
classOfTestExamples = new int [ numInstToGeneratePerClass * numberOfClasses ];
createExamplesByInterpolation ( testExamples, classOfTestExamples, numInstToGeneratePerClass, false );
// 2. Run the KNN algorithm on the interpolated data.
System.out.println ( " > Testing the KNN with the test instances " );
N4 = runKNN ( 1, testExamples, classOfTestExamples, numInstToGeneratePerClass * numberOfClasses, false );
System.out.println ( " N4: "+N4 );
} // end runN4
void organizePerClass () {
int i;
int []counterInstPerClass;
if ( numExamplesPerClass != null ) {
System.out.println ( " Examples already organized per class ");
return;
}
System.out.println ( " Organizing instances per class " );
numExamplesPerClass = new int [ numberOfClasses ];
counterInstPerClass = new int [ numberOfClasses ];
for ( i = 0; i < numberOfClasses; i++ ) {
numExamplesPerClass[i] = 0;
counterInstPerClass [i] = 0;
}
for ( i = 0; i < numberOfExamples; i++ ) {
numExamplesPerClass[ classOfExample[i] ] ++;
}
// Reserve memory to maintain pointers per class
examplesPerClass = new double [ numberOfClasses ][][];
indexExamplesPerClass = new int [ numberOfClasses ][];
for ( i = 0; i < numberOfClasses; i++ ) {
System.out.println ( " Number of instances of class " + i + ": " + numExamplesPerClass[i]);
examplesPerClass[i] = new double [ numExamplesPerClass[i] ] [];
indexExamplesPerClass[i] = new int [ numExamplesPerClass[i] ];
}
int whichClass;
// Group the instances per class
for ( i = 0; i < numberOfExamples; i++ ) {
whichClass = classOfExample[i];
examplesPerClass [ whichClass ] [ counterInstPerClass[whichClass] ] = example[i];
indexExamplesPerClass [ whichClass ] [ counterInstPerClass[whichClass] ] = i;
counterInstPerClass [ whichClass ] ++;
}
} // end organizePerClass
void createExamplesByInterpolation ( double[][] testExamples, int[] classOfTestExamples, int numExamplesTestPerClass, boolean isSMO ) {
int i, j, cClass, inst = 0, ex1, ex2;
double rnd;
// 1. Organize the instances of the training data set per class
organizePerClass ();
// 2. Generate the test instances
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
System.out.println ( " Generating instances of class: " + cClass );
for ( i = 0; i < numExamplesTestPerClass; i++ ) {
// 3.1. Allocate memory for one example
testExamples[inst] = new double [ numberOfAttributes ];
// 3.2. Select two examples of the class cClass
do {
ex1 = rndObject.nextInt ( numExamplesPerClass[ cClass ] );
ex2 = rndObject.nextInt ( numExamplesPerClass[ cClass ] );
}
while ( ex1 == ex2 && numExamplesPerClass[cClass] > 1 );
// 3.3. Get the absolute position of example 1 and 2
ex1 = indexExamplesPerClass[ cClass ][ ex1 ];
ex2 = indexExamplesPerClass[ cClass ][ ex2 ];
// 3.4. Create a new instance from these two ones
for ( j = 0; j < numberOfAttributes; j++ ) {
rnd = rndObject.nextDouble();
testExamples[ inst ][j] = example[ ex1 ][j] * rnd + example[ ex2 ][j] * ( 1 - rnd );
}
// 3.5. Set the class
if ( isSMO && cClass == 0 ) {
classOfTestExamples[ inst ] = -1;
}
else {
classOfTestExamples[ inst ] = cClass;
}
// 3.6. Increment the index inst
inst ++;
}
}
} // end createExamplesByInterpolation
void runT1 () {
// 0. Declare temporal variables.
int i;
int [][]neigh; // Nearest neighbor to each example.
double [][]distNeigh; // Distance to the nearest neighbor.
double globalMinDist; // Global minimum distance between pairs of examples of the same class.
boolean overlappedExamples = false; // Indicates whether there exist two examples of different class laying
// in the exact same point of the feature space.
double epsilon; // Maximum separation.
int [][]adherenceOrder;
int []maxAdherenceOrder;
// 1. Organize instances per class.
organizePerClass();
// 2. Initialize variables that contain the neighbors and distances to them.
neigh = new int [ numberOfClasses ][];
distNeigh = new double [ numberOfClasses ][];
adherenceOrder = new int [ numberOfClasses ][];
maxAdherenceOrder = new int [ numberOfClasses ];
for ( i = 0; i < numberOfClasses; i++ ) {
neigh[i] = new int [ numExamplesPerClass[i] ];
distNeigh[i] = new double [ numExamplesPerClass[i] ];
adherenceOrder[i] = new int [ numExamplesPerClass[i] ];
}
// 3. Search the nearest neighbors for each example.
double []refParameter = new double [2];
searchNearestNeighborsOfAnotherClass ( neigh, distNeigh, refParameter );
globalMinDist = refParameter[0];
overlappedExamples = ( refParameter[1] > 0 );
// 4. Define the maximum separation permitted, epsilon (0.55 is defined in Tin's original paper)
epsilon = ( 0.55 * globalMinDist );
// 5. Search for the adherence subsets
calculateAdherenceSubsets ( adherenceOrder, maxAdherenceOrder, distNeigh, overlappedExamples, epsilon );
// 6. Eliminate adherence subsets strictly included in another
eliminateAdherenceSetsIncluded ( adherenceOrder, maxAdherenceOrder, epsilon );
// 7. Get statistics for the pretopology metric.
double []valuesReturn = getStatisticsPretopology ( adherenceOrder, maxAdherenceOrder );
T1 = valuesReturn[0] / numberOfExamples;
System.out.println ( " T1: " + T1 );
} // end runT1
void searchNearestNeighborsOfAnotherClass ( int[][] neigh, double[][] distNeigh, double[] refParameter ) {
int cClass, oClass; //Current class (cClass) and opposite class (oClass).
int i, j;
double dist;
double globalMinDist = refParameter[0];
boolean overlappedExamples = false;
globalMinDist = Double.MAX_VALUE;
System.out.println ( " > Searching the nearest neighbors of another class " );
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
// Initialize to the farther possible distance
for ( i = 0; i < numExamplesPerClass[ cClass ]; i++ ) {
distNeigh[ cClass ][i] = Double.MAX_VALUE;
}
for ( oClass = 0; oClass < numberOfClasses; oClass++ ) {
if ( oClass != cClass ) {
for ( i = 0; i < numExamplesPerClass[ cClass ]; i++ ) {
for ( j = 0; j < numExamplesPerClass[ oClass ]; j++ ) {
dist = getDistance ( indexExamplesPerClass[ cClass ][i], indexExamplesPerClass[ oClass ][j] );
if ( dist < distNeigh[ cClass ][i] ) {
neigh[ cClass ][i] = indexExamplesPerClass[ oClass ][j];
distNeigh[ cClass ][i] = dist;
}
}
// Update the minimum distance between pairs of examples of different classes.
if ( distNeigh[ cClass ][i] == 0 ) {
overlappedExamples = true;
}
else if ( globalMinDist > distNeigh[ cClass ][i] ) {
globalMinDist = distNeigh[ cClass ][i];
}
}
}
}
}
refParameter[0] = globalMinDist;
refParameter[1] = ( overlappedExamples ) ? 1 : -1;
} // end searchNearestNeighborsOfAnotherClass
void calculateAdherenceSubsets ( int[][] adherenceOrder, int[] maxAdherenceOrder, double[][] distNeigh, boolean overlappedExamples, double epsilon ) {
int cClass, i;
System.out.println ( " > Calculating adherence subsets " );
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
maxAdherenceOrder[ cClass ] = 0;
for ( i = 0; i < numExamplesPerClass[cClass]; i++ ) {
// If we find two overlapped cases, the adherence order of the example is zero
if ( overlappedExamples && distNeigh[ cClass ][i] == 0. ) {
adherenceOrder[ cClass ][i] = 0;
}
else { // The nearest neighbor is not laying just in the same position
adherenceOrder[ cClass ][i] = ( int ) ( distNeigh[ cClass ][i] / epsilon ) - 1;
}
// Compute the maximum order per class
if ( adherenceOrder[ cClass ][i] > maxAdherenceOrder[ cClass ] ) {
maxAdherenceOrder[ cClass ] = adherenceOrder[ cClass ][i];
}
}
}
} // end calculateAdherenceSubsets
void eliminateAdherenceSetsIncluded ( int[][] adherenceOrder, int[] maxAdherenceOrder, double epsilon ) {
int cClass, i, j;
int maximum, nextMaximum;
double difOfOrder, dist;
System.out.println ( " > Eliminating adherence subsets that are included in others " );
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
maximum = maxAdherenceOrder[ cClass ];
// While we have a maximum adherence subset
while ( maximum >= 0 ) {
// Search for all the sets with adherence order = maximum, and try to
// subsume the others to it
for ( i = 0; i < numExamplesPerClass[ cClass ]; i++ ) {
if ( adherenceOrder[ cClass ][i] == maximum ) { // Example really far from the boundary.
// Eliminate the sets that are stricly included in this set.
for ( j = 0; j < numExamplesPerClass[ cClass ]; j++ ) {
difOfOrder = ( float ) ( adherenceOrder[ cClass ][i] - adherenceOrder[ cClass ][j] ) * epsilon;
dist = getDistance ( indexExamplesPerClass[ cClass ][i], indexExamplesPerClass[ cClass ][j] );
if ( dist < difOfOrder ) { // The adherence subset j is completely included in i.
// So, we remove adherence set since it is included in i.
adherenceOrder[ cClass ][j] = -1;
}
}
}
}
// Now, we search for the following maximum adherence set
nextMaximum = -1;
for ( i = 0; i < numExamplesPerClass[ cClass ]; i++ ) {
if ( adherenceOrder[ cClass ][i] != -1 && adherenceOrder[ cClass ][i] < maximum
&& adherenceOrder[ cClass ][i] > nextMaximum ) {
nextMaximum = adherenceOrder[ cClass ][i];
}
}
// Set to maximum the next maximum adherence set order.
maximum = nextMaximum;
}
}
} // end eliminateAdherenceSetsIncluded
double[] getStatisticsPretopology ( int[][] adherenceOrder, int[] maxAdherenceOrder ) {
int cClass, i;
float sum = 0, sumsqr = 0, numOrders = 0;
double[] stats = new double [5];
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
for ( i = 0; i < numExamplesPerClass[ cClass ]; i++ ) {
if ( adherenceOrder[ cClass ][i] >= 0 ) {
sum += ( float ) ( adherenceOrder[ cClass ][i] );
sumsqr += ( float ) ( adherenceOrder[ cClass ][i] * adherenceOrder[ cClass ][i] );
numOrders ++;
}
}
}
// 0. Number of adherence orders
stats[0] = numOrders;
// 1. Mean of the order of the adherence sets
stats[1] = sum / numOrders;
// 2. Standard deviation of the order of the adherence sets
stats[2] = Math.sqrt ( ( sumsqr - sum * sum / numOrders ) / ( numOrders - 1 ) );
// 3. Maximum order of class 0
stats[3] = maxAdherenceOrder [0];
// 4. Maximum order of class 1
stats[4] = maxAdherenceOrder [1];
System.out.println (" > Results T1: " + stats[0] + " " + stats[1] + " " + stats[2] + " " + stats[3] + " " + stats[4] );
return stats;
} // end getStatisticsPretopology
void runT2() {
T2 = (double) numberOfExamples / (double) numberOfAttributes ;
System.out.println ( " T2: " + T2 );
} // end runT2
private void quickSort ( double[] vector, int[] order, int inf, int sup ) {
int pivot;
if ( inf < sup ) {
// Divide and conquer
pivot = partition ( vector, order, inf, sup );
quickSort ( vector, order, inf, pivot - 1 );
quickSort ( vector, order, pivot + 1, sup );
}
} // end quickSort
private int partition ( double[] vector, int[] order, int inf, int sup ) {
double tempF;
int tempI;
int pivotPosition = inf;
int lastSmallerValue = inf;
int firstUnknown = inf + 1;
for ( ; firstUnknown <= sup; firstUnknown ++ ) {
if ( vector[ firstUnknown ] < vector[ pivotPosition ] ) {
lastSmallerValue ++;
tempF = vector[ firstUnknown ];
vector[ firstUnknown ] = vector[ lastSmallerValue ];
vector[ lastSmallerValue ] = tempF;
tempI = order[ firstUnknown ];
order[ firstUnknown ] = order[ lastSmallerValue ];
order[ lastSmallerValue ] = tempI;
}
}
tempF = vector[ inf ];
vector[ inf ] = vector[ lastSmallerValue ];
vector[ lastSmallerValue ] = tempF;
tempI = order[ inf ];
order[ inf ] = order[ lastSmallerValue ];
order[ lastSmallerValue ] = tempI;
return lastSmallerValue;
} // end partition
void runL1 () {
int i;
double[] w;
double[] B = new double [1];
B[0] = 0;
if ( numberOfClasses != 2 ) {
System.out.println ( " > [ERROR in L1] Nonlinearity of the linear classifier can be applied to only 2-class data sets " );
L1 = -1;
return;
}
// 1. Change the class 0 to -1
System.out.println ( " > Changing classes to -1, 1 " );
// 2. Training de support vector machine
w = trainSMO ( B );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == 0 ) {
classOfExample[i] = -1;
}
}
// 3. Get the sum of distances to the objective function for each training example
System.out.println ( " > Testing SVM with the train instances " );
L1 = getDistanceObjectiveFunction ( w, B[0], example, classOfExample, numberOfExamples );
// 4. Revert the process. Change the class -1 to 0
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == -1 ) {
classOfExample[i] = 0;
}
}
System.out.println ( " L1: " + L1 );
} // end runL1
void runL2 ( ) {
int i;
double[] w;
double[] B = new double [1];
B[0] = 0;
if ( numberOfClasses != 2 ) {
System.out.println ( " > [ERROR in L2] Nonlinearity of the linear classifier can be applied to only 2-class data sets " );
L2 = -1;
return;
}
// 1. Change the class 0 to -1
System.out.println ( " > Changing classes to -1, 1 " );
// 2. Training de support vector machine
w = trainSMO ( B );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == 0 ) {
classOfExample[i] = -1;
}
}
// 2. Test SMO with the train instances
System.out.println ( " > Testing SVM with the train instances " );
L2 = testSMO ( w, B[0], example, classOfExample, numberOfExamples );
// 3. Revert the process. Change the class -1 to 0
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == -1 ) {
classOfExample[i] = 0;
}
}
System.out.println ( " L2: " + L2 );
} // end runL2
void runL3 () {
double[][] testExamples;
int[] classOfTestExamples;
int i, cClass;
double[] w;
double[] B = new double [1];
B[0] = 0;
if ( numberOfClasses != 2 ) {
System.err.println ( " > [ERROR in L3] Nonlinearity of the linear classifier can be applied to only 2-class data sets. " );
return;
}
// 0. Check that all classes have examples
organizePerClass ();
for ( cClass = 0; cClass < numberOfClasses; cClass++ ) {
if ( numExamplesPerClass[ cClass ] < 1 ) {
System.out.println ( " > [ERROR in L3] Error in computing the nonlinearity of the Linear Classifier. " );
System.out.println ( " >> Class " + cClass + " has 0 instances. " );
L3 = -1;
return;
}
}
// 1. Create new examples by means of interpolation
int numInstToGeneratePerClass = 1000;
System.out.println ( " > Generating " + numInstToGeneratePerClass + " by means of interpolation " );
testExamples = new double [ numInstToGeneratePerClass * numberOfClasses ][];
classOfTestExamples = new int [ numInstToGeneratePerClass * numberOfClasses ];
createExamplesByInterpolation ( testExamples, classOfTestExamples, numInstToGeneratePerClass, false );
// 2. Change the class 0 to -1
System.out.println ( " > Changing classes to -1, 1 " );
for ( i = 0; i < numInstToGeneratePerClass*numberOfClasses; i++ ) {
if ( classOfTestExamples[i] == 0 ) {
classOfTestExamples[i] = -1;
}
}
// 3. Training de support vector machine
w = trainSMO ( B );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == 0 ) {
classOfExample[i] = -1;
}
}
// 4. Test SMO with the new interpolated examples
System.out.println ( " > Testing SVM with the test instances " );
L3 = testSMO ( w, B[0], testExamples, classOfTestExamples, numInstToGeneratePerClass * numberOfClasses );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == -1 ) {
classOfExample[i] = 0;
}
}
System.out.println (" L3: " + L3 );
} // end runL3
//////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// FUNCTIONS TO TRAIN A LINEAR SVM /////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
double testSMO ( double[] w, double b, double[][] testExamples, int[] classOfTestExamples, int numTestExamples ) {
int numError = 0;
for ( int i = 0; i < numTestExamples; i++ ) {
if ( ( learnedFunction ( testExamples[i], w, b ) > 0) != ( classOfTestExamples[i] > 0 ) ) {
numError++;
}
}
return (double) numError / (double) numTestExamples;
} // end testSMO
double getDistanceObjectiveFunction ( double[] w, double b, double[][] testExamples, int[] classOfTestExamples, int numTestExamples ) {
double dist = 0;
for ( int i = 0; i < numTestExamples; i++ ) {
dist += Math.abs ( learnedFunction ( testExamples[i], w, b ) - classOfTestExamples[i] );
}
return dist / ( (double)numTestExamples );
} // end getDistanceObjectiveFunction
double kernelFunction ( int i1, int i2 ) {
double dot = 0.0;
for ( int i = 0; i < numberOfAttributes; i++ ) {
dot += example[ i1 ][i] * example[ i2 ][i];
}
return dot;
} // end kernelFunction
double learnedFunction ( int k, double[] w, double b ) {
double s = 0.0;
for ( int i = 0; i < numberOfAttributes; i++ ) {
s += w[i] * example[k][i];
}
s -= b;
return s;
} // end learnedFunction
double learnedFunction ( double[] testExample, double[] w, double b ) {
double s = 0.0;
for ( int i = 0; i < numberOfAttributes; i++ ) {
s += w[i] * testExample[i];
}
s -= b;
return s;
} // end learnedFunction
int takeStep ( int i1, int i2, double[] B, double[] alpha, double[] w, double[] errorCache ) {
int y1, y2, s;
double alpha1, alpha2; // Old values of alpha1 and alpha2
double a1, a2; // New values of alpha1 and alpha2
double E1, E2, L, H, k11, k22, k12, eta, Lobj, Hobj;
double b = B[0];
if ( i1 == i2 ) {
B[0] = b;
return 0;
}
// Look up alpha1, y1, E1, alpha2, y2, E2
alpha1 = alpha[i1];
y1 = classOfExample[i1];
if ( alpha1 > 0 && alpha1 < C ) {
E1 = errorCache[i1];
}
else {
E1 = learnedFunction ( i1, w, b ) - y1;
}
alpha2 = alpha[i2];
y2 = classOfExample[i2];
if ( alpha2 > 0 && alpha2 < C ) {
E2 = errorCache[i2];
}
else {
E2 = learnedFunction ( i2, w, b ) - y2;
}
s = y1 * y2;
// Compute L, H
if ( y1 == y2 ) {
double gamma = alpha1 + alpha2;
if ( gamma > C ) {
L = gamma - C;
H = C;
}
else {
L = 0;
H = gamma;
}
}
else { // y1 != y2
double gamma = alpha1 - alpha2;
if ( gamma > 0 ) {
L = 0;
H = C - gamma;
}
else {
L = -gamma;
H = C;
}
}
if ( L == H ) {
B[0] = b;
return 0;
}
// Compute eta
k11 = kernelFunction ( i1, i1 );
k12 = kernelFunction ( i1, i2 );
k22 = kernelFunction ( i2, i2 );
eta = 2 * k12 - k11 - k22;
if ( eta < 0 ) {
a2 = alpha2 + y2 * ( E2 - E1 ) / eta;
if ( a2 < L ) {
a2 = L;
}
else if ( a2 > H ) {
a2 = H;
}
}
else {
// Compute Lobj, Hobj: objective function at a2 = L, a2 = H
double c1 = eta / 2;
double c2 = y2 * ( E1 - E2 ) - eta * alpha2;
Lobj = c1 * L * L + c2 * L;
Hobj = c1 * H * H + c2 * H;
if ( Lobj > Hobj + EPSILON ) {
a2 = L;
}
else if ( Lobj < Hobj - EPSILON ) {
a2 = H;
}
else {
a2 = alpha2;
}
}
if ( Math.abs ( a2 - alpha2 ) < EPSILON * ( a2 + alpha2 + EPSILON ) ) {
B[0] = b;
return 0;
}
a1 = alpha1 - s * ( a2 - alpha2 );
if ( a1 < 0 ) {
a2 += s * a1;
a1 = 0;
}
else if ( a1 > C ) {
double t = a1 - C;
a2 += s * t;
a1 = C;
}
// Update threshold to reflect change in Lagrange multipliers
double delta_b, bnew;
if ( a1 > 0 && a1 < C ) {
bnew = b + E1 + y1 * ( a1 - alpha1 ) * k11 + y2 * ( a2 - alpha2 ) * k12;
}
else {
if ( a2 > 0 && a2 < C ) {
bnew = b + E2 + y1 * ( a1 - alpha1 ) * k12 + y2 * ( a2 - alpha2 ) * k22;
}
else {
double b1, b2;
b1 = b + E1 + y1 * ( a1 - alpha1 ) * k11 + y2 * ( a2 - alpha2 ) * k12;
b2 = b + E2 + y1 * ( a1 - alpha1 ) * k12 + y2 * ( a2 - alpha2 ) * k22;
bnew = ( b1 + b2 ) / 2;
}
}
delta_b = bnew - b;
b = bnew;
// Update weight vector to reflect change in a1 and a2
double t1 = y1 * ( a1 - alpha1 );
double t2 = y2 * ( a2 - alpha2 );
for ( int i = 0; i < numberOfAttributes; i++ ) {
w[i] += example[i1][i] * t1 + example[i2][i] * t2;
}
// Update error cache using new Lagrange multipliers
t1 = y1 * ( a1 - alpha1 );
t2 = y2 * ( a2 - alpha2 );
for ( int i = 0; i < numberOfExamples; i++ ) {
if ( 0 < alpha[i] && alpha[i] < C ) {
errorCache[i] += t1 * kernelFunction ( i1, i ) + t2 * kernelFunction ( i2, i ) - delta_b;
errorCache[i1] = 0.0;
errorCache[i2] = 0.0;
}
}
alpha[i1] = a1; // Store a1 in the alpha array
alpha[i2] = a2; // Store a2 in the alpha array
B[0] = b;
return 1;
} // end takeStep
int argmaxE1E2 ( int i1, double E1, double[] B, double[] alpha, double[] w, double[] errorCache ) {
int k, i2;
double tmax;
double b = B[0];
for ( i2 = -1, tmax = 0, k = 0; k < numberOfExamples; k++ ) {
if ( alpha[k] > 0 && alpha[k] < C ) {
double E2, temp;
E2 = errorCache[k];
temp = Math.abs( E1 - E2 );
if ( temp > tmax ) {
tmax = temp;
i2 = k;
}
}
}
if ( i2 >= 0 ) {
B[0] = b;
if ( takeStep ( i1, i2, B, alpha, w, errorCache ) == 1 ) {
return 1;
}
}
B[0] = b;
return 0;
} // end argmaxE1E2
int iterateNonBoundExamples ( int i1, double[] B, double[] alpha, double[] w, double[] errorCache ) {
int k, k0, i2;
double b = B[0];
for ( k0 = ( int ) ( rndObject.nextDouble() * numberOfExamples ), k = k0; k < numberOfExamples + k0 ; k++ ) {
i2 = k % numberOfExamples;
if ( alpha[i2] > 0 && alpha[i2] < C ) {
B[0] = b;
if ( takeStep ( i1, i2, B, alpha, w, errorCache ) == 1 ) {
b = B[0];
return 1;
}
b = B[0];
}
}
B[0] = b;
return 0;
} // end iterateNonBoundExamples
int iterateEntireTrainingSet ( int i1, double[] B, double[] alpha, double[] w, double[] errorCache ) {
int k, k0, i2;
double b = B[0];
for ( k0 = (int) ( rndObject.nextDouble() * numberOfExamples ), k = k0; k < numberOfExamples + k0; k++ ) {
i2 = k % numberOfExamples;
B[0] = b;
if ( takeStep ( i1, i2, B, alpha, w, errorCache ) == 1) {
return 1;
}
b = B[0];
}
B[0] = b;
return 0;
} // end iterateEntireTrainingSet
int examineExample ( int i1, double[] B, double[] alpha, double[] w, double[] errorCache ) {
double y1, alpha1, E1, r1;
double b = B[0];
y1 = classOfExample[i1];
alpha1 = alpha[i1];
if ( alpha1 > 0 && alpha1 < C ) {
E1 = errorCache[i1];
}
else {
E1 = learnedFunction ( i1, w, b ) - y1;
}
r1 = y1 * E1;
// Check if example i1 violates KKT condition
if ( ( r1 < - ( TOLERANCE ) && alpha1 < C ) || ( r1 > TOLERANCE && alpha1 > 0 ) ) {
// The current example (i1) has violeated the KKT. So, we look for
// the second instance to jointly optimize the two alphas
// 1. Try argmax E1 - E2
if ( argmaxE1E2 ( i1, E1, B, alpha, w, errorCache ) == 1 ) {
return 1;
}
b = B[0];
// 2. Try iterating through the non-bound examples
if ( iterateNonBoundExamples ( i1, B, alpha, w, errorCache ) == 1 ) {
return 1;
}
b = B[0];
// 3. Try iterating through the entire training set
if ( iterateEntireTrainingSet ( i1, B, alpha, w, errorCache ) == 1 ) {
return 1;
}
b = B[0];
}
B[0] = b;
return 0;
} // end examineExample
double[] trainSMO ( double[] B ) {
double[] alpha; // Lagrange multipliers
double[] w; // Weight vector
double[] errorCache;
int i;
int numChanged = 0;
int examineAll = 1;
int iter = 0;
double b = B[0];
alpha = new double [ numberOfExamples ];
errorCache = new double [ numberOfExamples ];
w = new double [ numberOfAttributes ];
System.out.println ( " > Changing classes to -1, 1 " );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == 0 ) {
classOfExample[i] = -1;
}
}
for ( i = 0; i < numberOfExamples; i++ ) {
alpha[i] = 0;
// The initial error is 0 - classOfExample[i] since the SVM always predicts zero (w=0 and b=0)
errorCache[i] = 0; // - classOfExample[i];
}
for ( i = 0; i < numberOfAttributes; i++ ) {
w[i] = 0.0;
}
System.out.println ( " > Building the Support Vector Machine [progress line] " );
int maxIterations = ( numberOfExamples < 25000 ) ? 100000 : 4 * numberOfExamples;
while ( ( numChanged > 0 || examineAll == 1 ) && iter < maxIterations ) {
System.out.print ( "." );
numChanged = 0;
if ( examineAll == 1 ) {
for ( int k = 0; k < numberOfExamples; k++ ) {
numChanged += examineExample ( k, B, alpha, w, errorCache );
}
}
else {
for ( int k = 0; k < numberOfExamples; k++ ) {
if ( alpha[k] > 0 && alpha[k] < C ) {
numChanged += examineExample ( k, B, alpha, w, errorCache );
}
}
}
if ( examineAll == 1 ) {
examineAll = 0;
}
else if ( numChanged == 0 ) {
examineAll = 1;
}
iter ++;
}
System.out.println ( " > Changing classes to 0, 1 " );
for ( i = 0; i < numberOfExamples; i++ ) {
if ( classOfExample[i] == -1 ) {
classOfExample[i] = 0;
}
}
return w;
} // end trainSMO
//////////////////////////////////////////////////////////////////////////////////
/**
* It runs the algorithm
*
* @param args the command line arguments
*/
static public void main ( String [] args ) {
System.out.println ( " > Starting running the complexity metrics " );
System.out.println ( " > Config File: " + args[0] );
ComplexityMetrics cm = new ComplexityMetrics ( args[0] );
cm.run();
} // end main
} // end ComplexityMetrics