package func.svm;
import util.ABAGAILArrays;
import shared.DataSet;
import shared.Instance;
import shared.Trainer;
/**
* An implementation of the SMO algorithm.
* @author Andrew Guillory gtg008g@mail.gatech.edu
* @version 1.0
*/
public class SingleClassSequentialMinimalOptimization implements Trainer {
/**
* The tolerance value
*/
private static final double TOLERANCE = 1e-8;
/**
* The error value
*/
private static final double EPS = 1e-8;
/**
* An about zero value
*/
private static final double ZERO = 1e-8;
/**
* The number of iterations
*/
private int iterations;
/**
* The instances
*/
private DataSet examples;
/**
* The kernel function
*/
private Kernel kernel;
/**
* The slack value, all langrange multipliers are between
* 0 and 1 / (v * l) where v is examples.size()
*/
private double v;
/**
* For convience v * l
*/
private double vl;
/**
* The weights on the support vectors
*/
private double[] a;
/**
* The threshold subtracted when
* evaluating the support vector machine
*/
private double p;
/**
* Make a new SMO trainer
* @param examples the instances to train on
* @param kernel the kernel to use
* @param v the slack value
*/
public SingleClassSequentialMinimalOptimization(DataSet examples,
Kernel kernel, double v) {
// v can't be bigger than 1
v = Math.min(v, 1);
this.v = v;
this.kernel = kernel;
this.examples = examples;
// set up the kernel
kernel.clear();
kernel.setExamples(examples);
// initialize v * examples.size() of
// the multipliers to be 1 / (v*examples.size())
a = new double[examples.size()];
vl = v * examples.size();
int ivl = (int) vl;
int[] indices = ABAGAILArrays.indices(examples.size());
ABAGAILArrays.permute(indices);
for (int i = 0; i < ivl; i++) {
a[indices[i]] = 1 / vl;
}
// if v * examples.size() isn't an integer, ensure
// that the a values sum to 1
if (ivl != vl) {
double remainder = 1 - (1 / vl) * ivl;
a[indices[ivl]] = remainder;
}
// initialize p to be the max of O_i where a_i > 0
p = output(indices[0]);
for (int i = 1; i < a.length && a[indices[i]] > 0; i++) {
p = Math.max(p, output(indices[i]));
}
}
/**
* @see shared.Trainer#train()
*/
public double train() {
// number of alpha values changed this iteration
int numChanged = 0;
// whether or not to loop through all examples
boolean examineAll = true;
// the main training loop
while (numChanged > 0 | examineAll) {
iterations++;
numChanged = 0;
if (examineAll) {
// loop through all the examples
for (int i = 0; i < a.length; i++) {
if (examine(i)) {
numChanged++;
}
}
} else {
// loop through all non bounded
for (int i = 0; i < a.length; i++) {
if (!isBound(i) && examine(i)) {
numChanged++;
}
}
}
// if we just examined all
// we're either done or can
// go back to only looking at non bounded examples
// else if we didn't change anything
// we should check everything
if (examineAll) {
examineAll = false;
} else if (numChanged == 0) {
examineAll = true;
}
}
// subtract the tolerance from p to make sure
// things at the bound output 0
p -= TOLERANCE;
return 0;
}
/**
* Get the created support vector machine
* @return the support vector machine
*/
public SingleClassSupportVectorMachine getSupportVectorMachine() {
int supportVectorCount = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] != 0) {
supportVectorCount++;
}
}
Instance[] support =
new Instance[supportVectorCount];
double[] supporta = new double[supportVectorCount];
int j = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] != 0) {
support[j] = examples.get(i);
supporta[j] = a[i];
j++;
}
}
DataSet supportSet = new DataSet(support);
supportSet.setDescription(examples.getDescription());
return new SingleClassSupportVectorMachine(supportSet, supporta, kernel, p);
}
/**
* Get the number of iterations performed
* @return the number of iterations
*/
public int getNumberOfIterations() {
return iterations;
}
/**
* Examine an example
* @param i the index of the example to examine
* @return true if the example was changed
*/
private final boolean examine(int j) {
// compute the unthresholded output
double oj = output(j);
// if it doesn't violate the loose KTT conditions
// just return
if (!(((oj - p) * a[j] > TOLERANCE)
|| ((p - oj) * (1/(vl) - a[j]) > TOLERANCE))) {
return false;
}
// look for the max | oj - oi | : ai > 0 & ai < 1 / (vl)
int i = -1; double max = 0;
for (int k = 0; k < a.length; k++) {
double oi = output(k);
if (!isBound(k) && Math.abs(oj - oi) >= max) {
max = Math.abs(oj - oi); i = k;
}
}
// and try and take a optimization step
if (i != -1 && takeStep(i, j, oj)) {
return true;
}
// if the second choice hueristic fails we look
// at all non bound indices, starting from a random point
int startI = (int) Math.random() * a.length;
i = startI;
do {
if (!isBound(i) && takeStep(i, j, oj)) {
return true;
}
i = (i + 1) % a.length;
} while (i != startI);
// if that fails we look at all of the indices, starting from
// a random point
startI = (int) Math.random() * a.length;
i = startI;
do {
if (takeStep(i, j, oj)) {
return true;
}
i = (i + 1) % a.length;
} while (i != startI);
// we have failed to make progress
return false;
}
/**
* Perform the joint optimization on
* two indices
* @param i the first indice
* @param j the second
* @param oj the unthresholded output for the second indice
* @return true if we make progress
*/
private final boolean takeStep(int i, int j, double oj) {
// the indices must be different
if (i == j) {
return false;
}
// the new alpha values being computed
double ai, aj;
// the upper and lower bounds of the line for aj
double l, h;
// the unthresholded output for the first indice
double oi = output(i);
// compute the kernel values
double kii = kernel.value(i, i);
double kij = kernel.value(i, j);
double kjj = kernel.value(j, j);
// calculate the c values, which are the outputs
// minus the contributions from i and j
double ci = oi - a[i] * kii - a[j] * kij;
double cj = oj - a[i] * kij - a[j] * kjj;
// calcualte delta which is 1 - the sum of all ak | k != i & k != j
double d = a[i] + a[j];
// compute the l and h
l = Math.max(0, d - 1 / vl);
h = Math.min(1 / vl, d);
// no progress can be made
if (l == h) {
return false;
}
// calculate the new aj
// unconstrained max
aj = (d * (kii - kij) + ci - cj) / (kii + kjj - 2 * kij);
// clip it
if (aj < l) {
aj = l;
} else if (aj > h) {
aj = h;
}
// make aj zero or 1 / vl if it is close to it
if (aj < ZERO) {
aj = 0;
} else if (aj > 1 / vl - ZERO) {
aj = 1 / vl;
}
// if there's no progress
if (Math.abs(aj - a[j]) < EPS*(aj + a[j] + EPS)) {
return false;
}
// set the ai value
ai = d - aj;
// make ai zero or c if it is close to it
if (ai < ZERO) {
ai = 0;
} else if (ai > 1 / vl - ZERO) {
ai = 1 / vl;
}
// set the a values
a[i] = ai;
a[j] = aj;
// calculate the new threshold and return true
if (!isBound(i)) {
// ai is not bounded
p = output(i);
return true;
}
if (!isBound(j)) {
// aj is not bounded
p = output(j);
return true;
}
// use any other non bound alpha
int startK = (int) Math.random() * a.length;
int k = startK;
do {
if (!isBound(k)) {
p = output(j);
return true;
}
k = (k + 1) % a.length;
} while (k != startK);
// everything is bound use the largest output for a non zero a
p = Double.NEGATIVE_INFINITY;
for (k = 0; k < a.length; k++) {
if (a[k] > 0) {
p = Math.max(p, output(k));
}
}
return true;
}
/**
* Check if an index is bound
* @param i the index to check
* @return true if it is
*/
private final boolean isBound(int i) {
return a[i] <= 0 || a[i] >= 1.0 / (vl);
}
/**
* Evaluate the unthresholded output for an example
* @param i the example to evaluate for
* @return the unthresholded value
*/
private final double output(int i) {
double result = 0;
for (int j = 0; j < a.length; j++) {
if (a[j] != 0) {
result += a[j] * kernel.value(i, j);
}
}
return result;
}
/**
* @see java.lang.Object#toString()
*/
public String toString() {
String ret = "p = " + p + "\n";
ret += "kernel = " + kernel + "\n";
ret += examples.toString();
return ret;
}
}