/** * Copyright (c) 2007-2014 The LIBLINEAR Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this list of conditions * and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, this list of * conditions and the following disclaimer in the documentation and/or other materials provided with * the distribution. * * 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or * promote products derived from this software without specific prior written permission. * * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package de.bwaldvogel.liblinear; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.Closeable; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.Reader; import java.io.Writer; import java.nio.charset.Charset; import java.util.Formatter; import java.util.Locale; import java.util.Random; import java.util.regex.Pattern; /** * <h2>Java port of <a href="http://www.csie.ntu.edu.tw/~cjlin/liblinear/">liblinear</a></h2> * * <p> * The usage should be pretty similar to the C version of <tt>liblinear</tt>. * </p> * <p> * Please consider reading the <tt>README</tt> file of <tt>liblinear</tt>. * </p> * * <p> * <em>The port was done by Benedikt Waldvogel (mail at bwaldvogel.de)</em> * </p> * * @version 1.94 */ public class Linear { static final Charset FILE_CHARSET = Charset.forName("ISO-8859-1"); static final Locale DEFAULT_LOCALE = Locale.ENGLISH; private static Object OUTPUT_MUTEX = new Object(); private static PrintStream DEBUG_OUTPUT = System.out; private static final long DEFAULT_RANDOM_SEED = 0L; static Random random = new Random(DEFAULT_RANDOM_SEED); /** * @param target * predicted classes */ public static void crossValidation(Problem prob, Parameter param, int nr_fold, double[] target) { int i; int l = prob.l; int[] perm = new int[l]; if (nr_fold > l) { nr_fold = l; System.err .println("WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)"); } int[] fold_start = new int[nr_fold + 1]; for (i = 0; i < l; i++) { perm[i] = i; } for (i = 0; i < l; i++) { int j = i + random.nextInt(l - i); swap(perm, i, j); } for (i = 0; i <= nr_fold; i++) { fold_start[i] = i * l / nr_fold; } for (i = 0; i < nr_fold; i++) { int begin = fold_start[i]; int end = fold_start[i + 1]; int j, k; Problem subprob = new Problem(); subprob.bias = prob.bias; subprob.n = prob.n; subprob.l = l - (end - begin); subprob.x = new Feature[subprob.l][]; subprob.y = new double[subprob.l]; k = 0; for (j = 0; j < begin; j++) { subprob.x[k] = prob.x[perm[j]]; subprob.y[k] = prob.y[perm[j]]; ++k; } for (j = end; j < l; j++) { subprob.x[k] = prob.x[perm[j]]; subprob.y[k] = prob.y[perm[j]]; ++k; } Model submodel = train(subprob, param); for (j = begin; j < end; j++) { target[perm[j]] = predict(submodel, prob.x[perm[j]]); } } } /** used as complex return type */ private static class GroupClassesReturn { final int[] count; final int[] label; final int nr_class; final int[] start; GroupClassesReturn(int nr_class, int[] label, int[] start, int[] count) { this.nr_class = nr_class; this.label = label; this.start = start; this.count = count; } } private static GroupClassesReturn groupClasses(Problem prob, int[] perm) { int l = prob.l; int max_nr_class = 16; int nr_class = 0; int[] label = new int[max_nr_class]; int[] count = new int[max_nr_class]; int[] data_label = new int[l]; int i; for (i = 0; i < l; i++) { int this_label = (int) prob.y[i]; int j; for (j = 0; j < nr_class; j++) { if (this_label == label[j]) { ++count[j]; break; } } data_label[i] = j; if (j == nr_class) { if (nr_class == max_nr_class) { max_nr_class *= 2; label = copyOf(label, max_nr_class); count = copyOf(count, max_nr_class); } label[nr_class] = this_label; count[nr_class] = 1; ++nr_class; } } // // Labels are ordered by their first occurrence in the training set. // However, for two-class sets with -1/+1 labels and -1 appears first, // we swap labels to ensure that internally the binary SVM has positive data corresponding // to the +1 instances. // if (nr_class == 2 && label[0] == -1 && label[1] == 1) { swap(label, 0, 1); swap(count, 0, 1); for (i = 0; i < l; i++) { if (data_label[i] == 0) { data_label[i] = 1; } else { data_label[i] = 0; } } } int[] start = new int[nr_class]; start[0] = 0; for (i = 1; i < nr_class; i++) { start[i] = start[i - 1] + count[i - 1]; } for (i = 0; i < l; i++) { perm[start[data_label[i]]] = i; ++start[data_label[i]]; } start[0] = 0; for (i = 1; i < nr_class; i++) { start[i] = start[i - 1] + count[i - 1]; } return new GroupClassesReturn(nr_class, label, start, count); } static void info(String message) { synchronized (OUTPUT_MUTEX) { if (DEBUG_OUTPUT == null) { return; } DEBUG_OUTPUT.printf(message); DEBUG_OUTPUT.flush(); } } static void info(String format, Object... args) { synchronized (OUTPUT_MUTEX) { if (DEBUG_OUTPUT == null) { return; } DEBUG_OUTPUT.printf(format, args); DEBUG_OUTPUT.flush(); } } /** * @param s * the string to parse for the double value * @throws IllegalArgumentException * if s is empty or represents NaN or Infinity * @throws NumberFormatException * see {@link Double#parseDouble(String)} */ static double atof(String s) { if (s == null || s.length() < 1) { throw new IllegalArgumentException("Can't convert empty string to integer"); } double d = Double.parseDouble(s); if (Double.isNaN(d) || Double.isInfinite(d)) { throw new IllegalArgumentException("NaN or Infinity in input: " + s); } return d; } /** * @param s * the string to parse for the integer value * @throws IllegalArgumentException * if s is empty * @throws NumberFormatException * see {@link Integer#parseInt(String)} */ static int atoi(String s) throws NumberFormatException { if (s == null || s.length() < 1) { throw new IllegalArgumentException("Can't convert empty string to integer"); } // Integer.parseInt doesn't accept '+' prefixed strings if (s.charAt(0) == '+') { s = s.substring(1); } return Integer.parseInt(s); } /** * Java5 'backport' of Arrays.copyOf */ public static double[] copyOf(double[] original, int newLength) { double[] copy = new double[newLength]; System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength)); return copy; } /** * Java5 'backport' of Arrays.copyOf */ public static int[] copyOf(int[] original, int newLength) { int[] copy = new int[newLength]; System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength)); return copy; } /** * Loads the model from inputReader. It uses {@link java.util.Locale#ENGLISH} for number * formatting. * * <p> * Note: The inputReader is <b>NOT closed</b> after reading or in case of an exception. * </p> */ public static Model loadModel(Reader inputReader) throws IOException { Model model = new Model(); model.label = null; Pattern whitespace = Pattern.compile("\\s+"); BufferedReader reader = null; if (inputReader instanceof BufferedReader) { reader = (BufferedReader) inputReader; } else { reader = new BufferedReader(inputReader); } String line = null; while ((line = reader.readLine()) != null) { String[] split = whitespace.split(line); if (split[0].equals("solver_type")) { SolverType solver = SolverType.valueOf(split[1]); if (solver == null) { throw new RuntimeException("unknown solver type"); } model.solverType = solver; } else if (split[0].equals("nr_class")) { model.nr_class = atoi(split[1]); Integer.parseInt(split[1]); } else if (split[0].equals("nr_feature")) { model.nr_feature = atoi(split[1]); } else if (split[0].equals("bias")) { model.bias = atof(split[1]); } else if (split[0].equals("w")) { break; } else if (split[0].equals("label")) { model.label = new int[model.nr_class]; for (int i = 0; i < model.nr_class; i++) { model.label[i] = atoi(split[i + 1]); } } else { throw new RuntimeException("unknown text in model file: [" + line + "]"); } } int w_size = model.nr_feature; if (model.bias >= 0) { w_size++; } int nr_w = model.nr_class; if (model.nr_class == 2 && model.solverType != SolverType.MCSVM_CS) { nr_w = 1; } model.w = new double[w_size * nr_w]; int[] buffer = new int[128]; for (int i = 0; i < w_size; i++) { for (int j = 0; j < nr_w; j++) { int b = 0; while (true) { int ch = reader.read(); if (ch == -1) { throw new EOFException("unexpected EOF"); } if (ch == ' ') { model.w[i * nr_w + j] = atof(new String(buffer, 0, b)); break; } else { buffer[b++] = ch; } } } } return model; } /** * Loads the model from the file with ISO-8859-1 charset. It uses * {@link java.util.Locale#ENGLISH} for number formatting. */ public static Model loadModel(File modelFile) throws IOException { BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(modelFile), FILE_CHARSET)); try { return loadModel(inputReader); } finally { inputReader.close(); } } static void closeQuietly(Closeable c) { if (c == null) { return; } try { c.close(); } catch (Throwable t) { } } public static double predict(Model model, Feature[] x) { double[] dec_values = new double[model.nr_class]; return predictValues(model, x, dec_values); } /** * @throws IllegalArgumentException * if model is not probabilistic (see {@link Model#isProbabilityModel()}) */ public static double predictProbability(Model model, Feature[] x, double[] prob_estimates) throws IllegalArgumentException { if (!model.isProbabilityModel()) { StringBuilder sb = new StringBuilder("probability output is only supported for logistic regression"); sb.append(". This is currently only supported by the following solvers: "); int i = 0; for (SolverType solverType : SolverType.values()) { if (solverType.isLogisticRegressionSolver()) { if (i++ > 0) { sb.append(", "); } sb.append(solverType.name()); } } throw new IllegalArgumentException(sb.toString()); } int nr_class = model.nr_class; int nr_w; if (nr_class == 2) { nr_w = 1; } else { nr_w = nr_class; } double label = predictValues(model, x, prob_estimates); for (int i = 0; i < nr_w; i++) { prob_estimates[i] = 1 / (1 + Math.exp(-prob_estimates[i])); } if (nr_class == 2) { prob_estimates[1] = 1. - prob_estimates[0]; } else { double sum = 0; for (int i = 0; i < nr_class; i++) { sum += prob_estimates[i]; } for (int i = 0; i < nr_class; i++) { prob_estimates[i] = prob_estimates[i] / sum; } } return label; } public static double predictValues(Model model, Feature[] x, double[] dec_values) { int n; if (model.bias >= 0) { n = model.nr_feature + 1; } else { n = model.nr_feature; } double[] w = model.w; int nr_w; if (model.nr_class == 2 && model.solverType != SolverType.MCSVM_CS) { nr_w = 1; } else { nr_w = model.nr_class; } for (int i = 0; i < nr_w; i++) { dec_values[i] = 0; } for (Feature lx : x) { int idx = lx.getIndex(); // the dimension of testing data may exceed that of training if (idx <= n) { for (int i = 0; i < nr_w; i++) { dec_values[i] += w[(idx - 1) * nr_w + i] * lx.getValue(); } } } if (model.nr_class == 2) { if (model.solverType.isSupportVectorRegression()) { return dec_values[0]; } else { return dec_values[0] > 0 ? model.label[0] : model.label[1]; } } else { int dec_max_idx = 0; for (int i = 1; i < model.nr_class; i++) { if (dec_values[i] > dec_values[dec_max_idx]) { dec_max_idx = i; } } return model.label[dec_max_idx]; } } static void printf(Formatter formatter, String format, Object... args) throws IOException { formatter.format(format, args); IOException ioException = formatter.ioException(); if (ioException != null) { throw ioException; } } /** * Writes the model to the modelOutput. It uses {@link java.util.Locale#ENGLISH} for number * formatting. * * <p> * <b>Note: The modelOutput is closed after reading or in case of an exception.</b> * </p> */ public static void saveModel(Writer modelOutput, Model model) throws IOException { int nr_feature = model.nr_feature; int w_size = nr_feature; if (model.bias >= 0) { w_size++; } int nr_w = model.nr_class; if (model.nr_class == 2 && model.solverType != SolverType.MCSVM_CS) { nr_w = 1; } Formatter formatter = new Formatter(modelOutput, DEFAULT_LOCALE); try { printf(formatter, "solver_type %s\n", model.solverType.name()); printf(formatter, "nr_class %d\n", model.nr_class); if (model.label != null) { printf(formatter, "label"); for (int i = 0; i < model.nr_class; i++) { printf(formatter, " %d", model.label[i]); } printf(formatter, "\n"); } printf(formatter, "nr_feature %d\n", nr_feature); printf(formatter, "bias %.16g\n", model.bias); printf(formatter, "w\n"); for (int i = 0; i < w_size; i++) { for (int j = 0; j < nr_w; j++) { double value = model.w[i * nr_w + j]; /** this optimization is the reason for {@link Model#equals(double[], double[])} */ if (value == 0.0) { printf(formatter, "%d ", 0); } else { printf(formatter, "%.16g ", value); } } printf(formatter, "\n"); } formatter.flush(); IOException ioException = formatter.ioException(); if (ioException != null) { throw ioException; } } finally { formatter.close(); } } /** * Writes the model to the file with ISO-8859-1 charset. It uses * {@link java.util.Locale#ENGLISH} for number formatting. */ public static void saveModel(File modelFile, Model model) throws IOException { BufferedWriter modelOutput = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(modelFile), FILE_CHARSET)); saveModel(modelOutput, model); } /* * this method corresponds to the following define in the C version: #define GETI(i) (y[i]+1) */ private static int GETI(byte[] y, int i) { return y[i] + 1; } /** * A coordinate descent algorithm for L1-loss and L2-loss SVM dual problems * * <pre> * min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, * s.t. 0 <= \alpha_i <= upper_bound_i, * * where Qij = yi yj xi^T xj and * D is a diagonal matrix * * In L1-SVM case: * upper_bound_i = Cp if y_i = 1 * upper_bound_i = Cn if y_i = -1 * D_ii = 0 * In L2-SVM case: * upper_bound_i = INF * D_ii = 1/(2*Cp) if y_i = 1 * D_ii = 1/(2*Cn) if y_i = -1 * * Given: * x, y, Cp, Cn * eps is the stopping tolerance * * solution will be put in w * * See Algorithm 3 of Hsieh et al., ICML 2008 * </pre> */ private static void solve_l2r_l1l2_svc(Problem prob, double[] w, double eps, double Cp, double Cn, SolverType solver_type) { int l = prob.l; int w_size = prob.n; int i, s, iter = 0; double C, d, G; double[] QD = new double[l]; int max_iter = 1000; int[] index = new int[l]; double[] alpha = new double[l]; byte[] y = new byte[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = Double.POSITIVE_INFINITY; double PGmin_old = Double.NEGATIVE_INFINITY; double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[] = new double[] { 0.5 / Cn, 0, 0.5 / Cp }; double upper_bound[] = new double[] { Double.POSITIVE_INFINITY, 0, Double.POSITIVE_INFINITY }; if (solver_type == SolverType.L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; } for (i = 0; i < l; i++) { if (prob.y[i] > 0) { y[i] = +1; } else { y[i] = -1; } } // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] for (i = 0; i < l; i++) { alpha[i] = 0; } for (i = 0; i < w_size; i++) { w[i] = 0; } for (i = 0; i < l; i++) { QD[i] = diag[GETI(y, i)]; for (Feature xi : prob.x[i]) { double val = xi.getValue(); QD[i] += val * val; w[xi.getIndex() - 1] += y[i] * alpha[i] * val; } index[i] = i; } while (iter < max_iter) { PGmax_new = Double.NEGATIVE_INFINITY; PGmin_new = Double.POSITIVE_INFINITY; for (i = 0; i < active_size; i++) { int j = i + random.nextInt(active_size - i); swap(index, i, j); } for (s = 0; s < active_size; s++) { i = index[s]; G = 0; byte yi = y[i]; for (Feature xi : prob.x[i]) { G += w[xi.getIndex() - 1] * xi.getValue(); } G = G * yi - 1; C = upper_bound[GETI(y, i)]; G += alpha[i] * diag[GETI(y, i)]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index, s, active_size); s--; continue; } else if (G < 0) { PG = G; } } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index, s, active_size); s--; continue; } else if (G > 0) { PG = G; } } else { PG = G; } PGmax_new = Math.max(PGmax_new, PG); PGmin_new = Math.min(PGmin_new, PG); if (Math.abs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = Math.min(Math.max(alpha[i] - G / QD[i], 0.0), C); d = (alpha[i] - alpha_old) * yi; for (Feature xi : prob.x[i]) { w[xi.getIndex() - 1] += d * xi.getValue(); } } } iter++; if (iter % 10 == 0) { info("."); } if (PGmax_new - PGmin_new <= eps) { if (active_size == l) { break; } else { active_size = l; info("*"); PGmax_old = Double.POSITIVE_INFINITY; PGmin_old = Double.NEGATIVE_INFINITY; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) { PGmax_old = Double.POSITIVE_INFINITY; } if (PGmin_old >= 0) { PGmin_old = Double.NEGATIVE_INFINITY; } } info("%noptimization finished, #iter = %d%n", iter); if (iter >= max_iter) { info("%nWARNING: reaching max number of iterations%nUsing -s 2 may be faster (also see FAQ)%n%n"); } // calculate objective value double v = 0; int nSV = 0; for (i = 0; i < w_size; i++) { v += w[i] * w[i]; } for (i = 0; i < l; i++) { v += alpha[i] * (alpha[i] * diag[GETI(y, i)] - 2); if (alpha[i] > 0) { ++nSV; } } info("Objective value = %g%n", v / 2); info("nSV = %d%n", nSV); } // To support weights for instances, use GETI(i) (i) private static int GETI_SVR(int i) { return 0; } /** * A coordinate descent algorithm for L1-loss and L2-loss epsilon-SVR dual problem * * min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l * yi\beta_i, s.t. -upper_bound_i <= \beta_i <= upper_bound_i, * * where Qij = xi^T xj and D is a diagonal matrix * * In L1-SVM case: upper_bound_i = C lambda_i = 0 In L2-SVM case: upper_bound_i = INF lambda_i = * 1/(2*C) * * Given: x, y, p, C eps is the stopping tolerance * * solution will be put in w * * See Algorithm 4 of Ho and Lin, 2012 */ private static void solve_l2r_l1l2_svr(Problem prob, double[] w, Parameter param) { int l = prob.l; double C = param.C; double p = param.p; int w_size = prob.n; double eps = param.eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int[] index = new int[l]; double d, G, H; double Gmax_old = Double.POSITIVE_INFINITY; double Gmax_new, Gnorm1_new; double Gnorm1_init = 0; // initialize to 0 to get rid of Eclipse warning/error double[] beta = new double[l]; double[] QD = new double[l]; double[] y = prob.y; // L2R_L2LOSS_SVR_DUAL double[] lambda = new double[] { 0.5 / C }; double[] upper_bound = new double[] { Double.POSITIVE_INFINITY }; if (param.solverType == SolverType.L2R_L1LOSS_SVR_DUAL) { lambda[0] = 0; upper_bound[0] = C; } // Initial beta can be set here. Note that // -upper_bound <= beta[i] <= upper_bound for (i = 0; i < l; i++) { beta[i] = 0; } for (i = 0; i < w_size; i++) { w[i] = 0; } for (i = 0; i < l; i++) { QD[i] = 0; for (Feature xi : prob.x[i]) { double val = xi.getValue(); QD[i] += val * val; w[xi.getIndex() - 1] += beta[i] * val; } index[i] = i; } while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i = 0; i < active_size; i++) { int j = i + random.nextInt(active_size - i); swap(index, i, j); } for (s = 0; s < active_size; s++) { i = index[s]; G = -y[i] + lambda[GETI_SVR(i)] * beta[i]; H = QD[i] + lambda[GETI_SVR(i)]; for (Feature xi : prob.x[i]) { int ind = xi.getIndex() - 1; double val = xi.getValue(); G += val * w[ind]; } double Gp = G + p; double Gn = G - p; double violation = 0; if (beta[i] == 0) { if (Gp < 0) { violation = -Gp; } else if (Gn > 0) { violation = Gn; } else if (Gp > Gmax_old && Gn < -Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] >= upper_bound[GETI_SVR(i)]) { if (Gp > 0) { violation = Gp; } else if (Gp < -Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] <= -upper_bound[GETI_SVR(i)]) { if (Gn < 0) { violation = -Gn; } else if (Gn > Gmax_old) { active_size--; swap(index, s, active_size); s--; continue; } } else if (beta[i] > 0) { violation = Math.abs(Gp); } else { violation = Math.abs(Gn); } Gmax_new = Math.max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if (Gp < H * beta[i]) { d = -Gp / H; } else if (Gn > H * beta[i]) { d = -Gn / H; } else { d = -beta[i]; } if (Math.abs(d) < 1.0e-12) { continue; } double beta_old = beta[i]; beta[i] = Math.min(Math.max(beta[i] + d, -upper_bound[GETI_SVR(i)]), upper_bound[GETI_SVR(i)]); d = beta[i] - beta_old; if (d != 0) { for (Feature xi : prob.x[i]) { w[xi.getIndex() - 1] += d * xi.getValue(); } } } if (iter == 0) { Gnorm1_init = Gnorm1_new; } iter++; if (iter % 10 == 0) { info("."); } if (Gnorm1_new <= eps * Gnorm1_init) { if (active_size == l) { break; } else { active_size = l; info("*"); Gmax_old = Double.POSITIVE_INFINITY; continue; } } Gmax_old = Gmax_new; } info("%noptimization finished, #iter = %d%n", iter); if (iter >= max_iter) { info("%nWARNING: reaching max number of iterations%nUsing -s 11 may be faster%n%n"); } // calculate objective value double v = 0; int nSV = 0; for (i = 0; i < w_size; i++) { v += w[i] * w[i]; } v = 0.5 * v; for (i = 0; i < l; i++) { v += p * Math.abs(beta[i]) - y[i] * beta[i] + 0.5 * lambda[GETI_SVR(i)] * beta[i] * beta[i]; if (beta[i] != 0) { nSV++; } } info("Objective value = %g%n", v); info("nSV = %d%n", nSV); } /** * A coordinate descent algorithm for the dual of L2-regularized logistic regression problems * * <pre> * min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i) , * s.t. 0 <= \alpha_i <= upper_bound_i, * * where Qij = yi yj xi^T xj and * upper_bound_i = Cp if y_i = 1 * upper_bound_i = Cn if y_i = -1 * * Given: * x, y, Cp, Cn * eps is the stopping tolerance * * solution will be put in w * * See Algorithm 5 of Yu et al., MLJ 2010 * </pre> * * @since 1.7 */ private static void solve_l2r_lr_dual(Problem prob, double w[], double eps, double Cp, double Cn) { int l = prob.l; int w_size = prob.n; int i, s, iter = 0; double xTx[] = new double[l]; int max_iter = 1000; int index[] = new int[l]; double alpha[] = new double[2 * l]; // store alpha and C - alpha byte y[] = new byte[l]; int max_inner_iter = 100; // for inner Newton double innereps = 1e-2; double innereps_min = Math.min(1e-8, eps); double upper_bound[] = new double[] { Cn, 0, Cp }; for (i = 0; i < l; i++) { if (prob.y[i] > 0) { y[i] = +1; } else { y[i] = -1; } } // Initial alpha can be set here. Note that // 0 < alpha[i] < upper_bound[GETI(i)] // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)] for (i = 0; i < l; i++) { alpha[2 * i] = Math.min(0.001 * upper_bound[GETI(y, i)], 1e-8); alpha[2 * i + 1] = upper_bound[GETI(y, i)] - alpha[2 * i]; } for (i = 0; i < w_size; i++) { w[i] = 0; } for (i = 0; i < l; i++) { xTx[i] = 0; for (Feature xi : prob.x[i]) { double val = xi.getValue(); xTx[i] += val * val; w[xi.getIndex() - 1] += y[i] * alpha[2 * i] * val; } index[i] = i; } while (iter < max_iter) { for (i = 0; i < l; i++) { int j = i + random.nextInt(l - i); swap(index, i, j); } int newton_iter = 0; double Gmax = 0; for (s = 0; s < l; s++) { i = index[s]; byte yi = y[i]; double C = upper_bound[GETI(y, i)]; double ywTx = 0, xisq = xTx[i]; for (Feature xi : prob.x[i]) { ywTx += w[xi.getIndex() - 1] * xi.getValue(); } ywTx *= y[i]; double a = xisq, b = ywTx; // Decide to minimize g_1(z) or g_2(z) int ind1 = 2 * i, ind2 = 2 * i + 1, sign = 1; if (0.5 * a * (alpha[ind2] - alpha[ind1]) + b < 0) { ind1 = 2 * i + 1; ind2 = 2 * i; sign = -1; } // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) double alpha_old = alpha[ind1]; double z = alpha_old; if (C - z < 0.5 * C) { z = 0.1 * z; } double gp = a * (z - alpha_old) + sign * b + Math.log(z / (C - z)); Gmax = Math.max(Gmax, Math.abs(gp)); // Newton method on the sub-problem final double eta = 0.1; // xi in the paper int inner_iter = 0; while (inner_iter <= max_inner_iter) { if (Math.abs(gp) < innereps) { break; } double gpp = a + C / (C - z) / z; double tmpz = z - gp / gpp; if (tmpz <= 0) { z *= eta; } else { // tmpz in (0, C) z = tmpz; } gp = a * (z - alpha_old) + sign * b + Math.log(z / (C - z)); newton_iter++; inner_iter++; } if (inner_iter > 0) // update w { alpha[ind1] = z; alpha[ind2] = C - z; for (Feature xi : prob.x[i]) { w[xi.getIndex() - 1] += sign * (z - alpha_old) * yi * xi.getValue(); } } } iter++; if (iter % 10 == 0) { info("."); } if (Gmax < eps) { break; } if (newton_iter <= l / 10) { innereps = Math.max(innereps_min, 0.1 * innereps); } } info("%noptimization finished, #iter = %d%n", iter); if (iter >= max_iter) { info("%nWARNING: reaching max number of iterations%nUsing -s 0 may be faster (also see FAQ)%n%n"); } // calculate objective value double v = 0; for (i = 0; i < w_size; i++) { v += w[i] * w[i]; } v *= 0.5; for (i = 0; i < l; i++) { v += alpha[2 * i] * Math.log(alpha[2 * i]) + alpha[2 * i + 1] * Math.log(alpha[2 * i + 1]) - upper_bound[GETI(y, i)] * Math.log(upper_bound[GETI(y, i)]); } info("Objective value = %g%n", v); } /** * A coordinate descent algorithm for L1-regularized L2-loss support vector classification * * <pre> * min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, * * Given: * x, y, Cp, Cn * eps is the stopping tolerance * * solution will be put in w * * See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) * </pre> * * @since 1.5 */ private static void solve_l1r_l2_svc(Problem prob_col, double[] w, double eps, double Cp, double Cn) { int l = prob_col.l; int w_size = prob_col.n; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = Double.POSITIVE_INFINITY; double Gmax_new, Gnorm1_new; double Gnorm1_init = 0; // eclipse moans this variable might not be initialized double d_old, d_diff; double loss_old = 0; // eclipse moans this variable might not be initialized double loss_new; double appxcond, cond; int[] index = new int[w_size]; byte[] y = new byte[l]; double[] b = new double[l]; // b = 1-ywTx double[] xj_sq = new double[w_size]; double[] C = new double[] { Cn, 0, Cp }; // Initial w can be set here. for (j = 0; j < w_size; j++) { w[j] = 0; } for (j = 0; j < l; j++) { b[j] = 1; if (prob_col.y[j] > 0) { y[j] = 1; } else { y[j] = -1; } } for (j = 0; j < w_size; j++) { index[j] = j; xj_sq[j] = 0; for (Feature xi : prob_col.x[j]) { int ind = xi.getIndex() - 1; xi.setValue(xi.getValue() * y[ind]); // x->value stores yi*xij double val = xi.getValue(); b[ind] -= w[j] * val; xj_sq[j] += C[GETI(y, ind)] * val * val; } } while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (j = 0; j < active_size; j++) { int i = j + random.nextInt(active_size - j); swap(index, i, j); } for (s = 0; s < active_size; s++) { j = index[s]; G_loss = 0; H = 0; for (Feature xi : prob_col.x[j]) { int ind = xi.getIndex() - 1; if (b[ind] > 0) { double val = xi.getValue(); double tmp = C[GETI(y, ind)] * val; G_loss -= tmp * b[ind]; H += tmp * val; } } G_loss *= 2; G = G_loss; H *= 2; H = Math.max(H, 1e-12); double Gp = G + 1; double Gn = G - 1; double violation = 0; if (w[j] == 0) { if (Gp < 0) { violation = -Gp; } else if (Gn > 0) { violation = Gn; } else if (Gp > Gmax_old / l && Gn < -Gmax_old / l) { active_size--; swap(index, s, active_size); s--; continue; } } else if (w[j] > 0) { violation = Math.abs(Gp); } else { violation = Math.abs(Gn); } Gmax_new = Math.max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if (Gp < H * w[j]) { d = -Gp / H; } else if (Gn > H * w[j]) { d = -Gn / H; } else { d = -w[j]; } if (Math.abs(d) < 1.0e-12) { continue; } double delta = Math.abs(w[j] + d) - Math.abs(w[j]) + G * d; d_old = 0; int num_linesearch; for (num_linesearch = 0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; cond = Math.abs(w[j] + d) - Math.abs(w[j]) - sigma * delta; appxcond = xj_sq[j] * d * d + G_loss * d + cond; if (appxcond <= 0) { for (Feature x : prob_col.x[j]) { b[x.getIndex() - 1] += d_diff * x.getValue(); } break; } if (num_linesearch == 0) { loss_old = 0; loss_new = 0; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; if (b[ind] > 0) { loss_old += C[GETI(y, ind)] * b[ind] * b[ind]; } double b_new = b[ind] + d_diff * x.getValue(); b[ind] = b_new; if (b_new > 0) { loss_new += C[GETI(y, ind)] * b_new * b_new; } } } else { loss_new = 0; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; double b_new = b[ind] + d_diff * x.getValue(); b[ind] = b_new; if (b_new > 0) { loss_new += C[GETI(y, ind)] * b_new * b_new; } } } cond = cond + loss_new - loss_old; if (cond <= 0) { break; } else { d_old = d; d *= 0.5; delta *= 0.5; } } w[j] += d; // recompute b[] if line search takes too many steps if (num_linesearch >= max_num_linesearch) { info("#"); for (int i = 0; i < l; i++) { b[i] = 1; } for (int i = 0; i < w_size; i++) { if (w[i] == 0) { continue; } for (Feature x : prob_col.x[i]) { b[x.getIndex() - 1] -= w[i] * x.getValue(); } } } } if (iter == 0) { Gnorm1_init = Gnorm1_new; } iter++; if (iter % 10 == 0) { info("."); } if (Gmax_new <= eps * Gnorm1_init) { if (active_size == w_size) { break; } else { active_size = w_size; info("*"); Gmax_old = Double.POSITIVE_INFINITY; continue; } } Gmax_old = Gmax_new; } info("%noptimization finished, #iter = %d%n", iter); if (iter >= max_iter) { info("%nWARNING: reaching max number of iterations%n"); } // calculate objective value double v = 0; int nnz = 0; for (j = 0; j < w_size; j++) { for (Feature x : prob_col.x[j]) { x.setValue(x.getValue() * prob_col.y[x.getIndex() - 1]); // restore x->value } if (w[j] != 0) { v += Math.abs(w[j]); nnz++; } } for (j = 0; j < l; j++) { if (b[j] > 0) { v += C[GETI(y, j)] * b[j] * b[j]; } } info("Objective value = %g%n", v); info("#nonzeros/#features = %d/%d%n", nnz, w_size); } /** * A coordinate descent algorithm for L1-regularized logistic regression problems * * <pre> * min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), * * Given: * x, y, Cp, Cn * eps is the stopping tolerance * * solution will be put in w * * See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) * </pre> * * @since 1.5 */ private static void solve_l1r_lr(Problem prob_col, double[] w, double eps, double Cp, double Cn) { int l = prob_col.l; int w_size = prob_col.n; int j, s, newton_iter = 0, iter = 0; int max_newton_iter = 100; int max_iter = 1000; int max_num_linesearch = 20; int active_size; int QP_active_size; double nu = 1e-12; double inner_eps = 1; double sigma = 0.01; double w_norm, w_norm_new; double z, G, H; double Gnorm1_init = 0; // eclipse moans this variable might not be initialized double Gmax_old = Double.POSITIVE_INFINITY; double Gmax_new, Gnorm1_new; double QP_Gmax_old = Double.POSITIVE_INFINITY; double QP_Gmax_new, QP_Gnorm1_new; double delta, negsum_xTd, cond; int[] index = new int[w_size]; byte[] y = new byte[l]; double[] Hdiag = new double[w_size]; double[] Grad = new double[w_size]; double[] wpd = new double[w_size]; double[] xjneg_sum = new double[w_size]; double[] xTd = new double[l]; double[] exp_wTx = new double[l]; double[] exp_wTx_new = new double[l]; double[] tau = new double[l]; double[] D = new double[l]; double[] C = { Cn, 0, Cp }; // Initial w can be set here. for (j = 0; j < w_size; j++) { w[j] = 0; } for (j = 0; j < l; j++) { if (prob_col.y[j] > 0) { y[j] = 1; } else { y[j] = -1; } exp_wTx[j] = 0; } w_norm = 0; for (j = 0; j < w_size; j++) { w_norm += Math.abs(w[j]); wpd[j] = w[j]; index[j] = j; xjneg_sum[j] = 0; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; double val = x.getValue(); exp_wTx[ind] += w[j] * val; if (y[ind] == -1) { xjneg_sum[j] += C[GETI(y, ind)] * val; } } } for (j = 0; j < l; j++) { exp_wTx[j] = Math.exp(exp_wTx[j]); double tau_tmp = 1 / (1 + exp_wTx[j]); tau[j] = C[GETI(y, j)] * tau_tmp; D[j] = C[GETI(y, j)] * exp_wTx[j] * tau_tmp * tau_tmp; } while (newton_iter < max_newton_iter) { Gmax_new = 0; Gnorm1_new = 0; active_size = w_size; for (s = 0; s < active_size; s++) { j = index[s]; Hdiag[j] = nu; Grad[j] = 0; double tmp = 0; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; Hdiag[j] += x.getValue() * x.getValue() * D[ind]; tmp += x.getValue() * tau[ind]; } Grad[j] = -tmp + xjneg_sum[j]; double Gp = Grad[j] + 1; double Gn = Grad[j] - 1; double violation = 0; if (w[j] == 0) { if (Gp < 0) { violation = -Gp; } else if (Gn > 0) { violation = Gn; } else if (Gp > Gmax_old / l && Gn < -Gmax_old / l) { active_size--; swap(index, s, active_size); s--; continue; } } else if (w[j] > 0) { violation = Math.abs(Gp); } else { violation = Math.abs(Gn); } Gmax_new = Math.max(Gmax_new, violation); Gnorm1_new += violation; } if (newton_iter == 0) { Gnorm1_init = Gnorm1_new; } if (Gnorm1_new <= eps * Gnorm1_init) { break; } iter = 0; QP_Gmax_old = Double.POSITIVE_INFINITY; QP_active_size = active_size; for (int i = 0; i < l; i++) { xTd[i] = 0; } // optimize QP over wpd while (iter < max_iter) { QP_Gmax_new = 0; QP_Gnorm1_new = 0; for (j = 0; j < QP_active_size; j++) { int i = random.nextInt(QP_active_size - j); swap(index, i, j); } for (s = 0; s < QP_active_size; s++) { j = index[s]; H = Hdiag[j]; G = Grad[j] + (wpd[j] - w[j]) * nu; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; G += x.getValue() * D[ind] * xTd[ind]; } double Gp = G + 1; double Gn = G - 1; double violation = 0; if (wpd[j] == 0) { if (Gp < 0) { violation = -Gp; } else if (Gn > 0) { violation = Gn; } else if (Gp > QP_Gmax_old / l && Gn < -QP_Gmax_old / l) { QP_active_size--; swap(index, s, QP_active_size); s--; continue; } } else if (wpd[j] > 0) { violation = Math.abs(Gp); } else { violation = Math.abs(Gn); } QP_Gmax_new = Math.max(QP_Gmax_new, violation); QP_Gnorm1_new += violation; // obtain solution of one-variable problem if (Gp < H * wpd[j]) { z = -Gp / H; } else if (Gn > H * wpd[j]) { z = -Gn / H; } else { z = -wpd[j]; } if (Math.abs(z) < 1.0e-12) { continue; } z = Math.min(Math.max(z, -10.0), 10.0); wpd[j] += z; for (Feature x : prob_col.x[j]) { int ind = x.getIndex() - 1; xTd[ind] += x.getValue() * z; } } iter++; if (QP_Gnorm1_new <= inner_eps * Gnorm1_init) { // inner stopping if (QP_active_size == active_size) { break; } else { QP_active_size = active_size; QP_Gmax_old = Double.POSITIVE_INFINITY; continue; } } QP_Gmax_old = QP_Gmax_new; } if (iter >= max_iter) { info("WARNING: reaching max number of inner iterations%n"); } delta = 0; w_norm_new = 0; for (j = 0; j < w_size; j++) { delta += Grad[j] * (wpd[j] - w[j]); if (wpd[j] != 0) { w_norm_new += Math.abs(wpd[j]); } } delta += w_norm_new - w_norm; negsum_xTd = 0; for (int i = 0; i < l; i++) { if (y[i] == -1) { negsum_xTd += C[GETI(y, i)] * xTd[i]; } } int num_linesearch; for (num_linesearch = 0; num_linesearch < max_num_linesearch; num_linesearch++) { cond = w_norm_new - w_norm + negsum_xTd - sigma * delta; for (int i = 0; i < l; i++) { double exp_xTd = Math.exp(xTd[i]); exp_wTx_new[i] = exp_wTx[i] * exp_xTd; cond += C[GETI(y, i)] * Math.log((1 + exp_wTx_new[i]) / (exp_xTd + exp_wTx_new[i])); } if (cond <= 0) { w_norm = w_norm_new; for (j = 0; j < w_size; j++) { w[j] = wpd[j]; } for (int i = 0; i < l; i++) { exp_wTx[i] = exp_wTx_new[i]; double tau_tmp = 1 / (1 + exp_wTx[i]); tau[i] = C[GETI(y, i)] * tau_tmp; D[i] = C[GETI(y, i)] * exp_wTx[i] * tau_tmp * tau_tmp; } break; } else { w_norm_new = 0; for (j = 0; j < w_size; j++) { wpd[j] = (w[j] + wpd[j]) * 0.5; if (wpd[j] != 0) { w_norm_new += Math.abs(wpd[j]); } } delta *= 0.5; negsum_xTd *= 0.5; for (int i = 0; i < l; i++) { xTd[i] *= 0.5; } } } // Recompute some info due to too many line search steps if (num_linesearch >= max_num_linesearch) { for (int i = 0; i < l; i++) { exp_wTx[i] = 0; } for (int i = 0; i < w_size; i++) { if (w[i] == 0) { continue; } for (Feature x : prob_col.x[i]) { exp_wTx[x.getIndex() - 1] += w[i] * x.getValue(); } } for (int i = 0; i < l; i++) { exp_wTx[i] = Math.exp(exp_wTx[i]); } } if (iter == 1) { inner_eps *= 0.25; } newton_iter++; Gmax_old = Gmax_new; info("iter %3d #CD cycles %d%n", newton_iter, iter); } info("=========================%n"); info("optimization finished, #iter = %d%n", newton_iter); if (newton_iter >= max_newton_iter) { info("WARNING: reaching max number of iterations%n"); } // calculate objective value double v = 0; int nnz = 0; for (j = 0; j < w_size; j++) { if (w[j] != 0) { v += Math.abs(w[j]); nnz++; } } for (j = 0; j < l; j++) { if (y[j] == 1) { v += C[GETI(y, j)] * Math.log(1 + 1 / exp_wTx[j]); } else { v += C[GETI(y, j)] * Math.log(1 + exp_wTx[j]); } } info("Objective value = %g%n", v); info("#nonzeros/#features = %d/%d%n", nnz, w_size); } // transpose matrix X from row format to column format static Problem transpose(Problem prob) { int l = prob.l; int n = prob.n; int[] col_ptr = new int[n + 1]; Problem prob_col = new Problem(); prob_col.l = l; prob_col.n = n; prob_col.y = new double[l]; prob_col.x = new Feature[n][]; for (int i = 0; i < l; i++) { prob_col.y[i] = prob.y[i]; } for (int i = 0; i < l; i++) { for (Feature x : prob.x[i]) { col_ptr[x.getIndex()]++; } } for (int i = 0; i < n; i++) { prob_col.x[i] = new Feature[col_ptr[i + 1]]; col_ptr[i] = 0; // reuse the array to count the nr of elements } for (int i = 0; i < l; i++) { for (int j = 0; j < prob.x[i].length; j++) { Feature x = prob.x[i][j]; int index = x.getIndex() - 1; prob_col.x[index][col_ptr[index]] = new FeatureNode(i + 1, x.getValue()); col_ptr[index]++; } } return prob_col; } static void swap(double[] array, int idxA, int idxB) { double temp = array[idxA]; array[idxA] = array[idxB]; array[idxB] = temp; } static void swap(int[] array, int idxA, int idxB) { int temp = array[idxA]; array[idxA] = array[idxB]; array[idxB] = temp; } static void swap(IntArrayPointer array, int idxA, int idxB) { int temp = array.get(idxA); array.set(idxA, array.get(idxB)); array.set(idxB, temp); } /** * @throws IllegalArgumentException * if the feature nodes of prob are not sorted in ascending order */ public static Model train(Problem prob, Parameter param) { if (prob == null) { throw new IllegalArgumentException("problem must not be null"); } if (param == null) { throw new IllegalArgumentException("parameter must not be null"); } if (prob.n == 0) { throw new IllegalArgumentException("problem has zero features"); } if (prob.l == 0) { throw new IllegalArgumentException("problem has zero instances"); } for (Feature[] nodes : prob.x) { int indexBefore = 0; for (Feature n : nodes) { if (n.getIndex() <= indexBefore) { throw new IllegalArgumentException("feature nodes must be sorted by index in ascending order"); } indexBefore = n.getIndex(); } } int l = prob.l; int n = prob.n; int w_size = prob.n; Model model = new Model(); if (prob.bias >= 0) { model.nr_feature = n - 1; } else { model.nr_feature = n; } model.solverType = param.solverType; model.bias = prob.bias; if (param.solverType == SolverType.L2R_L2LOSS_SVR || // param.solverType == SolverType.L2R_L1LOSS_SVR_DUAL || // param.solverType == SolverType.L2R_L2LOSS_SVR_DUAL) { model.w = new double[w_size]; model.nr_class = 2; model.label = null; checkProblemSize(n, model.nr_class); train_one(prob, param, model.w, 0, 0); } else { int[] perm = new int[l]; // group training data of the same class GroupClassesReturn rv = groupClasses(prob, perm); int nr_class = rv.nr_class; int[] label = rv.label; int[] start = rv.start; int[] count = rv.count; checkProblemSize(n, nr_class); model.nr_class = nr_class; model.label = new int[nr_class]; for (int i = 0; i < nr_class; i++) { model.label[i] = label[i]; } // calculate weighted C double[] weighted_C = new double[nr_class]; for (int i = 0; i < nr_class; i++) { weighted_C[i] = param.C; } for (int i = 0; i < param.getNumWeights(); i++) { int j; for (j = 0; j < nr_class; j++) { if (param.weightLabel[i] == label[j]) { break; } } if (j == nr_class) { throw new IllegalArgumentException("class label " + param.weightLabel[i] + " specified in weight is not found"); } weighted_C[j] *= param.weight[i]; } // constructing the subproblem Feature[][] x = new Feature[l][]; for (int i = 0; i < l; i++) { x[i] = prob.x[perm[i]]; } Problem sub_prob = new Problem(); sub_prob.l = l; sub_prob.n = n; sub_prob.x = new Feature[sub_prob.l][]; sub_prob.y = new double[sub_prob.l]; for (int k = 0; k < sub_prob.l; k++) { sub_prob.x[k] = x[k]; } // multi-class svm by Crammer and Singer if (param.solverType == SolverType.MCSVM_CS) { model.w = new double[n * nr_class]; for (int i = 0; i < nr_class; i++) { for (int j = start[i]; j < start[i] + count[i]; j++) { sub_prob.y[j] = i; } } SolverMCSVM_CS solver = new SolverMCSVM_CS(sub_prob, nr_class, weighted_C, param.eps); solver.solve(model.w); } else { if (nr_class == 2) { model.w = new double[w_size]; int e0 = start[0] + count[0]; int k = 0; for (; k < e0; k++) { sub_prob.y[k] = +1; } for (; k < sub_prob.l; k++) { sub_prob.y[k] = -1; } train_one(sub_prob, param, model.w, weighted_C[0], weighted_C[1]); } else { model.w = new double[w_size * nr_class]; double[] w = new double[w_size]; for (int i = 0; i < nr_class; i++) { int si = start[i]; int ei = si + count[i]; int k = 0; for (; k < si; k++) { sub_prob.y[k] = -1; } for (; k < ei; k++) { sub_prob.y[k] = +1; } for (; k < sub_prob.l; k++) { sub_prob.y[k] = -1; } train_one(sub_prob, param, w, weighted_C[i], param.C); for (int j = 0; j < n; j++) { model.w[j * nr_class + i] = w[j]; } } } } } return model; } /** * verify the size and throw an exception early if the problem is too large */ private static void checkProblemSize(int n, int nr_class) { if (n >= Integer.MAX_VALUE / nr_class || n * nr_class < 0) { throw new IllegalArgumentException("'number of classes' * 'number of instances' is too large: " + nr_class + "*" + n); } } private static void train_one(Problem prob, Parameter param, double[] w, double Cp, double Cn) { double eps = param.eps; int pos = 0; for (int i = 0; i < prob.l; i++) { if (prob.y[i] > 0) { pos++; } } int neg = prob.l - pos; double primal_solver_tol = eps * Math.max(Math.min(pos, neg), 1) / prob.l; Function fun_obj = null; switch (param.solverType) { case L2R_LR: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) { if (prob.y[i] > 0) { C[i] = Cp; } else { C[i] = Cn; } } fun_obj = new L2R_LrFunction(prob, C); Tron tron_obj = new Tron(fun_obj, primal_solver_tol); tron_obj.tron(w); break; } case L2R_L2LOSS_SVC: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) { if (prob.y[i] > 0) { C[i] = Cp; } else { C[i] = Cn; } } fun_obj = new L2R_L2_SvcFunction(prob, C); Tron tron_obj = new Tron(fun_obj, primal_solver_tol); tron_obj.tron(w); break; } case L2R_L2LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.L2R_L2LOSS_SVC_DUAL); break; case L2R_L1LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, SolverType.L2R_L1LOSS_SVC_DUAL); break; case L1R_L2LOSS_SVC: { Problem prob_col = transpose(prob); solve_l1r_l2_svc(prob_col, w, primal_solver_tol, Cp, Cn); break; } case L1R_LR: { Problem prob_col = transpose(prob); solve_l1r_lr(prob_col, w, primal_solver_tol, Cp, Cn); break; } case L2R_LR_DUAL: solve_l2r_lr_dual(prob, w, eps, Cp, Cn); break; case L2R_L2LOSS_SVR: { double[] C = new double[prob.l]; for (int i = 0; i < prob.l; i++) { C[i] = param.C; } fun_obj = new L2R_L2_SvrFunction(prob, C, param.p); Tron tron_obj = new Tron(fun_obj, param.eps); tron_obj.tron(w); break; } case L2R_L1LOSS_SVR_DUAL: case L2R_L2LOSS_SVR_DUAL: solve_l2r_l1l2_svr(prob, w, param); break; default: throw new IllegalStateException("unknown solver type: " + param.solverType); } } public static void disableDebugOutput() { setDebugOutput(null); } public static void enableDebugOutput() { setDebugOutput(System.out); } public static void setDebugOutput(PrintStream debugOutput) { synchronized (OUTPUT_MUTEX) { DEBUG_OUTPUT = debugOutput; } } /** * resets the PRNG * * this is i.a. needed for regression testing (eg. the Weka wrapper) */ public static void resetRandom() { random = new Random(DEFAULT_RANDOM_SEED); } }