SolverMCSVM_CS.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (c) 2007-2014 The LIBLINEAR Project. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted
 * provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions
 * and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of
 * conditions and the following disclaimer in the documentation and/or other materials provided with
 * the distribution.
 *
 * 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or
 * promote products derived from this software without specific prior written permission.
 *
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package de.bwaldvogel.liblinear;

import static de.bwaldvogel.liblinear.Linear.copyOf;
import static de.bwaldvogel.liblinear.Linear.info;
import static de.bwaldvogel.liblinear.Linear.swap;


/**
 * A coordinate descent algorithm for multi-class support vector machines by Crammer and Singer
 *
 * <pre>
 * min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
 * s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
 * 
 * where e^m_i = 0 if y_i = m,
 * e^m_i = 1 if y_i != m,
 * C^m_i = C if m = y_i,
 * C^m_i = 0 if m != y_i,
 * and w_m(\alpha) = \sum_i \alpha^m_i x_i
 * 
 * Given:
 * x, y, C
 * eps is the stopping tolerance
 * 
 * solution will be put in w
 * 
 * See Appendix of LIBLINEAR paper, Fan et al. (2008)
 * </pre>
 */
class SolverMCSVM_CS {

	private final double[] B;
	private final double[] C;
	private final double eps;
	private final double[] G;
	private final int max_iter;
	private final int w_size, l;
	private final int nr_class;
	private final Problem prob;

	public SolverMCSVM_CS(Problem prob, int nr_class, double[] C) {
		this(prob, nr_class, C, 0.1);
	}

	public SolverMCSVM_CS(Problem prob, int nr_class, double[] C, double eps) {
		this(prob, nr_class, C, eps, 100000);
	}

	public SolverMCSVM_CS(Problem prob, int nr_class, double[] weighted_C, double eps, int max_iter) {
		this.w_size = prob.n;
		this.l = prob.l;
		this.nr_class = nr_class;
		this.eps = eps;
		this.max_iter = max_iter;
		this.prob = prob;
		this.C = weighted_C;
		this.B = new double[nr_class];
		this.G = new double[nr_class];
	}

	private int GETI(int i) {
		return (int) prob.y[i];
	}

	private boolean be_shrunk(int i, int m, int yi, double alpha_i, double minG) {
		double bound = 0;
		if (m == yi) {
			bound = C[GETI(i)];
		}
		if (alpha_i == bound && G[m] < minG) {
			return true;
		}
		return false;
	}

	public void solve(double[] w) {
		int i, m, s;
		int iter = 0;
		double[] alpha = new double[l * nr_class];
		double[] alpha_new = new double[nr_class];
		int[] index = new int[l];
		double[] QD = new double[l];
		int[] d_ind = new int[nr_class];
		double[] d_val = new double[nr_class];
		int[] alpha_index = new int[nr_class * l];
		int[] y_index = new int[l];
		int active_size = l;
		int[] active_size_i = new int[l];
		double eps_shrink = Math.max(10.0 * eps, 1.0); // stopping tolerance for shrinking
		boolean start_from_all = true;

		// Initial alpha can be set here. Note that
		// sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
		// alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
		// alpha[i*nr_class+m] <= 0 if prob->y[i] != m
		// If initial alpha isn't zero, uncomment the for loop below to initialize w
		for (i = 0; i < l * nr_class; i++) {
			alpha[i] = 0;
		}

		for (i = 0; i < w_size * nr_class; i++) {
			w[i] = 0;
		}
		for (i = 0; i < l; i++) {
			for (m = 0; m < nr_class; m++) {
				alpha_index[i * nr_class + m] = m;
			}
			QD[i] = 0;
			for (Feature xi : prob.x[i]) {
				double val = xi.getValue();
				QD[i] += val * val;

				// Uncomment the for loop if initial alpha isn't zero
				// for(m=0; m<nr_class; m++)
				// w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
			}
			active_size_i[i] = nr_class;
			y_index[i] = (int) prob.y[i];
			index[i] = i;
		}

		DoubleArrayPointer alpha_i = new DoubleArrayPointer(alpha, 0);
		IntArrayPointer alpha_index_i = new IntArrayPointer(alpha_index, 0);

		while (iter < max_iter) {
			double stopping = Double.NEGATIVE_INFINITY;

			for (i = 0; i < active_size; i++) {
				// int j = i+rand()%(active_size-i);
				int j = i + Linear.random.nextInt(active_size - i);
				swap(index, i, j);
			}
			for (s = 0; s < active_size; s++) {

				i = index[s];
				double Ai = QD[i];
				// double *alpha_i = &alpha[i*nr_class];
				alpha_i.setOffset(i * nr_class);

				// int *alpha_index_i = &alpha_index[i*nr_class];
				alpha_index_i.setOffset(i * nr_class);

				if (Ai > 0) {
					for (m = 0; m < active_size_i[i]; m++) {
						G[m] = 1;
					}
					if (y_index[i] < active_size_i[i]) {
						G[y_index[i]] = 0;
					}

					for (Feature xi : prob.x[i]) {
						// double *w_i = &w[(xi.index-1)*nr_class];
						int w_offset = (xi.getIndex() - 1) * nr_class;
						for (m = 0; m < active_size_i[i]; m++) {
							// G[m] += w_i[alpha_index_i[m]]*(xi.value);
							G[m] += w[w_offset + alpha_index_i.get(m)] * xi.getValue();
						}

					}

					double minG = Double.POSITIVE_INFINITY;
					double maxG = Double.NEGATIVE_INFINITY;
					for (m = 0; m < active_size_i[i]; m++) {
						if (alpha_i.get(alpha_index_i.get(m)) < 0 && G[m] < minG) {
							minG = G[m];
						}
						if (G[m] > maxG) {
							maxG = G[m];
						}
					}
					if (y_index[i] < active_size_i[i]) {
						if (alpha_i.get((int) prob.y[i]) < C[GETI(i)] && G[y_index[i]] < minG) {
							minG = G[y_index[i]];
						}
					}

					for (m = 0; m < active_size_i[i]; m++) {
						if (be_shrunk(i, m, y_index[i], alpha_i.get(alpha_index_i.get(m)), minG)) {
							active_size_i[i]--;
							while (active_size_i[i] > m) {
								if (!be_shrunk(i, active_size_i[i], y_index[i],
										alpha_i.get(alpha_index_i.get(active_size_i[i])), minG)) {
									swap(alpha_index_i, m, active_size_i[i]);
									swap(G, m, active_size_i[i]);
									if (y_index[i] == active_size_i[i]) {
										y_index[i] = m;
									} else if (y_index[i] == m) {
										y_index[i] = active_size_i[i];
									}
									break;
								}
								active_size_i[i]--;
							}
						}
					}

					if (active_size_i[i] <= 1) {
						active_size--;
						swap(index, s, active_size);
						s--;
						continue;
					}

					if (maxG - minG <= 1e-12) {
						continue;
					} else {
						stopping = Math.max(maxG - minG, stopping);
					}

					for (m = 0; m < active_size_i[i]; m++) {
						B[m] = G[m] - Ai * alpha_i.get(alpha_index_i.get(m));
					}

					solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
					int nz_d = 0;
					for (m = 0; m < active_size_i[i]; m++) {
						double d = alpha_new[m] - alpha_i.get(alpha_index_i.get(m));
						alpha_i.set(alpha_index_i.get(m), alpha_new[m]);
						if (Math.abs(d) >= 1e-12) {
							d_ind[nz_d] = alpha_index_i.get(m);
							d_val[nz_d] = d;
							nz_d++;
						}
					}

					for (Feature xi : prob.x[i]) {
						// double *w_i = &w[(xi->index-1)*nr_class];
						int w_offset = (xi.getIndex() - 1) * nr_class;
						for (m = 0; m < nz_d; m++) {
							w[w_offset + d_ind[m]] += d_val[m] * xi.getValue();
						}
					}
				}
			}

			iter++;

			if (iter % 10 == 0) {
				info(".");
			}

			if (stopping < eps_shrink) {
				if (stopping < eps && start_from_all == true) {
					break;
				} else {
					active_size = l;
					for (i = 0; i < l; i++) {
						active_size_i[i] = nr_class;
					}
					info("*");
					eps_shrink = Math.max(eps_shrink / 2, eps);
					start_from_all = true;
				}
			} else {
				start_from_all = false;
			}
		}

		info("%noptimization finished, #iter = %d%n", iter);
		if (iter >= max_iter) {
			info("%nWARNING: reaching max number of iterations%n");
		}

		// calculate objective value
		double v = 0;
		int nSV = 0;
		for (i = 0; i < w_size * nr_class; i++) {
			v += w[i] * w[i];
		}
		v = 0.5 * v;
		for (i = 0; i < l * nr_class; i++) {
			v += alpha[i];
			if (Math.abs(alpha[i]) > 0) {
				nSV++;
			}
		}
		for (i = 0; i < l; i++) {
			v -= alpha[i * nr_class + (int) prob.y[i]];
		}
		info("Objective value = %f%n", v);
		info("nSV = %d%n", nSV);

	}

	private void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double[] alpha_new) {

		int r;
		assert active_i <= B.length; // no padding
		double[] D = copyOf(B, active_i);
		// clone(D, B, active_i);

		if (yi < active_i) {
			D[yi] += A_i * C_yi;
		}

		// qsort(D, active_i, sizeof(double), compare_double);
		ArraySorter.reversedMergesort(D);

		double beta = D[0] - A_i * C_yi;
		for (r = 1; r < active_i && beta < r * D[r]; r++) {
			beta += D[r];
		}
		beta /= r;

		for (r = 0; r < active_i; r++) {
			if (r == yi) {
				alpha_new[r] = Math.min(C_yi, (beta - B[r]) / A_i);
			} else {
				alpha_new[r] = Math.min(0.0, (beta - B[r]) / A_i);
			}
		}
	}
}