/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.ml.classifier.svm; /** * Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> at 1/16/15. */ import rapaio.math.MathTools; import rapaio.core.RandomSource; import rapaio.data.Frame; import rapaio.data.Mapping; import rapaio.data.Var; import rapaio.data.VarType; import rapaio.data.filter.FFilter; import rapaio.data.sample.Sample; import rapaio.data.sample.RowSampler; import rapaio.ml.classifier.AbstractClassifier; import rapaio.ml.classifier.CFit; import rapaio.ml.classifier.Classifier; import rapaio.ml.classifier.svm.kernel.Kernel; import rapaio.ml.classifier.svm.kernel.PolyKernel; import rapaio.ml.common.Capabilities; import java.io.Serializable; import java.util.BitSet; import java.util.List; import static rapaio.sys.WS.formatFlex; /** * Class for building a binary support vector machine. */ public class BinarySMO extends AbstractClassifier implements Serializable { private static final long serialVersionUID = 1208515184777030598L; protected double[] alpha; // Lagrange multipliers from dual protected double b, bLow, bUp; // thresholds protected int iLow, iUp; // indices for bLow and bUp /** * Variables to hold weight vector in sparse form. * (To reduce storage requirements.) */ protected double[] sparseWeights; protected int[] sparseIndices; protected Kernel kernel = new PolyKernel(1); protected double[] target; protected double[] fCache; // The current set of errors for all non-bound examples. /* The five different sets used by the algorithm. */ protected BitSet I0; // i: 0 < alpha[i] < C protected BitSet I1; // i: classes[i] = 1, alpha[i] = 0 protected BitSet I2; // i: classes[i] = -1, alpha[i] =C protected BitSet I3; // i: classes[i] = 1, alpha[i] = C protected BitSet I4; // i: classes[i] = -1, alpha[i] = 0 /** * The set of support vectors */ protected BitSet supportVectors; // {i: 0 < alpha[i]} protected double sumOfWeights = 0; // class indices private int classIndex1 = 1; private int classIndex2 = 2; private boolean oneVsAll = false; private int maxRuns = Integer.MAX_VALUE; private double C = 1.0; // complexity parameter private double tol = 1e-3; // tolerance of accuracy private Frame train; private Var weights; private int targetIndex; /** * Weight vector for linear machine. */ private double[] linear_weights; @Override public String name() { return "BinarySMO"; } @Override public String fullName() { return name() + "\n" + "{\n" + " sampler=" + sampler().name() + ",\n" + " kernel=" + kernel.name() + ",\n" + " C=" + formatFlex(C) + ",\n" + " tol=" + formatFlex(tol) + ",\n" + " classIndex1=" + classIndex1 + ",\n" + " classIndex2=" + classIndex2 + ",\n" + " oneVsAll=" + oneVsAll + ",\n" + " maxRuns=" + maxRuns + "\n" + "}\n"; } @Override public Classifier newInstance() { return new BinarySMO() .withSampler(sampler()) .withKernel(kernel.newInstance()) .withC(C) .withTol(tol) .withFirstClassIndex(classIndex1) .withSecondClassIndex(classIndex2) .withOneVsAll(oneVsAll) .withMaxRuns(maxRuns) .withInputFilters(inputFilters()); } public BinarySMO withKernel(Kernel value) { kernel = value; return this; } public BinarySMO withC(double c) { this.C = c; return this; } public BinarySMO withTol(double tol) { this.tol = tol; return this; } /** * Set target index of the first class. */ public BinarySMO withFirstClassIndex(int classIndex1) { this.classIndex1 = classIndex1; return this; } /** * Sets target index of the second class, used only when * oneVsAll if false */ public BinarySMO withSecondClassIndex(int classIndex2) { this.classIndex2 = classIndex2; return this; } /** * If true then first class index is classified against * all other target classes, if false than first class index * is classified against second class index. * * @param oneVsAll true for one vs all classification */ public BinarySMO withOneVsAll(boolean oneVsAll) { this.oneVsAll = oneVsAll; return this; } public BinarySMO withMaxRuns(int maxRuns) { this.maxRuns = maxRuns; return this; } @Override public BinarySMO withSampler(RowSampler sampler) { return (BinarySMO) super.withSampler(sampler); } @Override public Capabilities capabilities() { return new Capabilities() .withInputTypes(VarType.BINARY, VarType.INDEX, VarType.NOMINAL, VarType.NUMERIC) .withInputCount(1, 100_000) .withAllowMissingInputValues(false) .withTargetTypes(VarType.NOMINAL) .withTargetCount(1, oneVsAll ? 10_000 : 1) .withAllowMissingTargetValues(false); } @Override protected boolean coreTrain(Frame df, Var weights) { // process classes if (classIndex1 == classIndex2) { throw new IllegalArgumentException("Indexes for first and second target " + "class labels are equal, which is not allowed."); } if (!oneVsAll) { Mapping map = df .stream() .filter(s -> s.index(firstTargetName()) == classIndex1 || s.index(firstTargetName()) == classIndex2) .collectMapping(); df = df.mapRows(map); weights = weights.mapRows(map); } // perform sampling Sample sample = sampler().nextSample(df, weights); df = sample.df; weights = sample.weights; if (df.rowCount() == 0) { throw new IllegalArgumentException("After filtering other classes, there " + "were no other rows remained"); } // Initialize some variables // Set the reference to the data this.train = df; this.weights = weights; bUp = -1; bLow = 1; b = 0; alpha = null; linear_weights = null; fCache = null; I0 = null; I1 = null; I2 = null; I3 = null; I4 = null; sparseWeights = null; sparseIndices = null; // Store the sum of weights sumOfWeights = weights.stream().mapToDouble().sum(); // Set class values target = new double[train.rowCount()]; iUp = -1; iLow = -1; for (int i = 0; i < train.rowCount(); i++) { if (df.var(firstTargetName()).index(i) == classIndex1) { target[i] = -1; iLow = i; } else { target[i] = 1; iUp = i; } } // Check whether one or both classes are missing if ((iUp == -1) || (iLow == -1)) { if (iUp != -1) { b = -1; } else if (iLow != -1) { b = 1; } else { target = null; return false; } if (kernel.isLinear()) { sparseWeights = new double[0]; sparseIndices = new int[0]; target = null; } else { supportVectors = new BitSet(0); alpha = new double[0]; target = new double[0]; } return false; } targetIndex = df.varIndex(firstTargetName()); // If machine is linear, reserve space for weights if (kernel.isLinear()) { linear_weights = new double[inputNames().length]; } else { linear_weights = null; } // Initialize alpha array to zero alpha = new double[df.rowCount()]; // Initialize sets supportVectors = new BitSet(df.rowCount()); I0 = new BitSet(df.rowCount()); I1 = new BitSet(df.rowCount()); I2 = new BitSet(df.rowCount()); I3 = new BitSet(df.rowCount()); I4 = new BitSet(df.rowCount()); // Clean out some instance variables sparseWeights = null; sparseIndices = null; // init kernel kernel.buildKernel(inputNames(), df); // Initialize error cache fCache = new double[df.rowCount()]; fCache[iLow] = 1; fCache[iUp] = -1; // Build up I1 and I4 for (int i = 0; i < train.rowCount(); i++) { if (target[i] == 1) { I1.set(i, true); } else { I4.set(i, true); } } // Loop to find all the support vectors int numChanged = 0; boolean examineAll = true; int runs = maxRuns; while (numChanged > 0 || examineAll) { numChanged = 0; if (examineAll) { // add random as an additional step int offset = RandomSource.nextInt(train.rowCount()); for (int i = offset; i < train.rowCount() + offset; i++) { int pos = i; if (pos >= train.rowCount()) pos -= train.rowCount(); if (examineExample(pos)) { numChanged++; } } } else { // This code implements Modification 1 from Keerthi et al.'s paper // int offset = RandomSource.nextInt(train.rowCount()); // for (int i = offset; i < train.rowCount() + offset; i++) { // // int pos = i; // if (pos >= train.rowCount()) // pos -= train.rowCount(); // // if (alpha[pos] > 0 && alpha[pos] < C * weights.value(pos)) { // if (examineExample(pos)) { // numChanged++; // } //// Is optimality on unbound vectors obtained? // if (bUp > bLow - 2 * tol) { // numChanged = 0; // break; // } // } // } //This is the code for Modification 2 from Keerthi et al.'s paper boolean innerLoopSuccess = true; numChanged = 0; while ((bUp < bLow - 2 * tol) && innerLoopSuccess) { innerLoopSuccess = takeStep(iUp, iLow); if (innerLoopSuccess) { numChanged++; } } } if (examineAll) { examineAll = false; } else if (numChanged == 0) { examineAll = true; } if (runs == 0) { break; } runs--; } // Set threshold b = (bLow + bUp) / 2.0; // Save memory kernel.clean(); fCache = null; I0 = I1 = I2 = I3 = I4 = null; // If machine is linear, delete training data // and store weight vector in sparse format if (kernel.isLinear()) { // We don't need to store the set of support vectors supportVectors = null; // We don't need to store the class values either target = null; // Convert weight vector double[] sparseWeights = new double[linear_weights.length]; int[] sparseIndices = new int[linear_weights.length]; int counter = 0; for (int i = 0; i < linear_weights.length; i++) { if (linear_weights[i] != 0.0) { sparseWeights[counter] = linear_weights[i]; sparseIndices[counter] = i; counter++; } } this.sparseWeights = new double[counter]; this.sparseIndices = new int[counter]; System.arraycopy(sparseWeights, 0, this.sparseWeights, 0, counter); System.arraycopy(sparseIndices, 0, this.sparseIndices, 0, counter); // Clean out weight vector linear_weights = null; // We don't need the alphas in the linear case alpha = null; } return true; } @Override protected CFit coreFit(Frame df, boolean withClasses, boolean withDistributions) { CFit cr = CFit.build(this, df, withClasses, withDistributions); for (int i = 0; i < df.rowCount(); i++) { double pred = predict(df, i); // TODO generalize // pred = 1.0 / (1.0 + Math.exp(-pred)); // // cr.firstClasses().setIndex(i, (pred < 0.5) ? classIndex1 : classIndex2); // cr.firstDensity().setValue(i, firstTargetLevel(classIndex1), 1 - pred); // cr.firstDensity().setValue(i, firstTargetLevel(classIndex2), pred); // this is the old distance variant if (pred < 0) { cr.firstClasses().setIndex(i, classIndex1); cr.firstDensity().setValue(i, firstTargetLevel(classIndex1), -pred); cr.firstDensity().setValue(i, firstTargetLevel(classIndex2), pred); } else { cr.firstClasses().setIndex(i, classIndex2); cr.firstDensity().setValue(i, firstTargetLevel(classIndex1), -pred); cr.firstDensity().setValue(i, firstTargetLevel(classIndex2), pred); } } return cr; } /** * Computes SVM output for given instance. */ protected double predict(Frame df, int row) { double result = 0; // Is the machine linear? if (kernel.isLinear()) { // Is weight vector stored in sparse format? if (sparseWeights == null) { int n1 = inputNames().length; for (int p = 0; p < n1; p++) { if (p != targetIndex) { result += linear_weights[p] * df.value(row, p); } } } else { int n1 = inputNames().length; int n2 = sparseWeights.length; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2; ) { int ind1 = p1; int ind2 = sparseIndices[p2]; if (ind1 == ind2) { if (ind1 != targetIndex) { result += df.value(row, p1) * sparseWeights[p2]; } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } } } else { for (int i = supportVectors.nextSetBit(0); i != -1; i = supportVectors.nextSetBit(i + 1)) { result += target[i] * alpha[i] * kernel.compute(train, i, df, row); } } result -= b; return result; } /** * Examines instance. * * @param i2 index of instance to examine * @return true if examination was successful */ protected boolean examineExample(int i2) { double y2 = target[i2]; double F2; if (I0.get(i2)) { F2 = fCache[i2]; } else { F2 = predict(train, i2) + b - y2; fCache[i2] = F2; // Update thresholds if ((I1.get(i2) || I2.get(i2)) && (F2 < bUp)) { bUp = F2; iUp = i2; } else if ((I3.get(i2) || I4.get(i2)) && (F2 > bLow)) { bLow = F2; iLow = i2; } } int i1 = -1; // Check optimality using current bLow and bUp and, if // violated, find an index i1 to do joint optimization // with i2... boolean optimal = true; if (I0.get(i2) || I1.get(i2) || I2.get(i2)) { if (bLow - F2 > 2 * tol) { optimal = false; i1 = iLow; } } if (I0.get(i2) || I3.get(i2) || I4.get(i2)) { if (F2 - bUp > 2 * tol) { optimal = false; i1 = iUp; } } if (optimal) { return false; } // For i2 unbound choose the better i1... if (I0.get(i2)) { if (bLow - F2 > F2 - bUp) { i1 = iLow; } else { i1 = iUp; } } if (i1 == -1) { throw new RuntimeException("This should never happen!"); } return takeStep(i1, i2); } /** * Method solving for the Lagrange multipliers for * two instances. * * @param i1 index of the first instance * @param i2 index of the second instance * @return true if multipliers could be found */ protected boolean takeStep(int i1, int i2) { // Don't do anything if the two instances are the same if (i1 == i2) { return false; } // Initialize variables double alph1 = alpha[i1]; double alph2 = alpha[i2]; double y1 = target[i1]; double y2 = target[i2]; double F1 = fCache[i1]; double F2 = fCache[i2]; double s = y1 * y2; double C1 = C * weights.value(i1); double C2 = C * weights.value(i2); double L, H; // Find the constraints on a2 if (y1 != y2) { L = Math.max(0, alph2 - alph1); H = Math.min(C2, C1 + alph2 - alph1); } else { L = Math.max(0, alph1 + alph2 - C1); H = Math.min(C2, alph1 + alph2); } if (MathTools.eq(L, H, 1e-10)) { // old condition was >= return false; } // Compute second derivative of objective function double k11 = kernel.compute(train, i1, train, i1); double k12 = kernel.compute(train, i1, train, i2); double k22 = kernel.compute(train, i2, train, i2); double eta = 2 * k12 - k11 - k22; double a1, a2; // Check if second derivative is negative double eps = 1e-12; if (eta < 0) { // Compute unconstrained maximum a2 = alph2 - y2 * (F1 - F2) / eta; // Compute constrained maximum if (a2 < L) { a2 = L; } else if (a2 > H) { a2 = H; } } else { // Look at endpoints of diagonal double f1 = predict(train, i1); double f2 = predict(train, i2); double v1 = f1 + b - y1 * alph1 * k11 - y2 * alph2 * k12; double v2 = f2 + b - y1 * alph1 * k12 - y2 * alph2 * k22; double gamma = alph1 + s * alph2; double Lobj = (gamma - s * L) + L - 0.5 * k11 * (gamma - s * L) * (gamma - s * L) - 0.5 * k22 * L * L - s * k12 * (gamma - s * L) * L - y1 * (gamma - s * L) * v1 - y2 * L * v2; double Hobj = (gamma - s * H) + H - 0.5 * k11 * (gamma - s * H) * (gamma - s * H) - 0.5 * k22 * H * H - s * k12 * (gamma - s * H) * H - y1 * (gamma - s * H) * v1 - y2 * H * v2; if (Lobj > Hobj + eps) { a2 = L; } else if (Lobj < Hobj - eps) { a2 = H; } else { a2 = alph2; } } if (Math.abs(a2 - alph2) < eps * (a2 + alph2 + eps)) { return false; } // To prevent precision problems double m_Del = 1000 * Double.MIN_VALUE; if (a2 > C2 - m_Del * C2) { a2 = C2; } else if (a2 <= m_Del * C2) { a2 = 0; } // Recompute a1 a1 = alph1 + s * (alph2 - a2); // To prevent precision problems if (a1 > C1 - m_Del * C1) { a1 = C1; } else if (a1 <= m_Del * C1) { a1 = 0; } // Update sets if (a1 > 0) { supportVectors.set(i1, true); } else { supportVectors.set(i1, false); } if ((a1 > 0) && (a1 < C1)) { I0.set(i1, true); } else { I0.set(i1, false); } if ((y1 == 1) && (a1 == 0)) { I1.set(i1, true); } else { I1.set(i1, false); } if ((y1 == -1) && (a1 == C1)) { I2.set(i1, true); } else { I2.set(i1, false); } if ((y1 == 1) && (a1 == C1)) { I3.set(i1, true); } else { I3.set(i1, false); } if ((y1 == -1) && (a1 == 0)) { I4.set(i1, true); } else { I4.set(i1, false); } if (a2 > 0) { supportVectors.set(i2, true); } else { supportVectors.set(i2, false); } if ((a2 > 0) && (a2 < C2)) { I0.set(i2, true); } else { I0.set(i2, false); } if ((y2 == 1) && (a2 == 0)) { I1.set(i2, true); } else { I1.set(i2, false); } if ((y2 == -1) && (a2 == C2)) { I2.set(i2, true); } else { I2.set(i2, false); } if ((y2 == 1) && (a2 == C2)) { I3.set(i2, true); } else { I3.set(i2, false); } if ((y2 == -1) && (a2 == 0)) { I4.set(i2, true); } else { I4.set(i2, false); } // Update weight vector to reflect change a1 and a2, if linear SVM if (kernel.isLinear()) { for (int p1 = 0; p1 < inputNames().length; p1++) { if (p1 != targetIndex) { linear_weights[p1] += y1 * (a1 - alph1) * train.value(i1, p1); } } for (int p2 = 0; p2 < inputNames().length; p2++) { if (p2 != targetIndex) { linear_weights[p2] += y2 * (a2 - alph2) * train.value(i2, p2); } } } // Update error cache using new Lagrange multipliers for (int j = I0.nextSetBit(0); j != -1; j = I0.nextSetBit(j + 1)) { if ((j != i1) && (j != i2)) { fCache[j] += y1 * (a1 - alph1) * kernel.compute(train, i1, train, j) + y2 * (a2 - alph2) * kernel.compute(train, i2, train, j); } } // Update error cache for i1 and i2 fCache[i1] += y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12; fCache[i2] += y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22; // Update array with Lagrange multipliers alpha[i1] = a1; alpha[i2] = a2; // Update thresholds bLow = -Double.MAX_VALUE; bUp = Double.MAX_VALUE; iLow = -1; iUp = -1; for (int j = I0.nextSetBit(0); j != -1; j = I0.nextSetBit(j + 1)) { if (fCache[j] < bUp) { bUp = fCache[j]; iUp = j; } if (fCache[j] > bLow) { bLow = fCache[j]; iLow = j; } } if (!I0.get(i1)) { if (I3.get(i1) || I4.get(i1)) { if (fCache[i1] > bLow) { bLow = fCache[i1]; iLow = i1; } } else { if (fCache[i1] < bUp) { bUp = fCache[i1]; iUp = i1; } } } if (!I0.get(i2)) { if (I3.get(i2) || I4.get(i2)) { if (fCache[i2] > bLow) { bLow = fCache[i2]; iLow = i2; } } else { if (fCache[i2] < bUp) { bUp = fCache[i2]; iUp = i2; } } } if (iLow == -1 || iUp == -1) { throw new RuntimeException("This should never happen!"); } // Made some progress. return true; } @Override public String summary() { StringBuilder sb = new StringBuilder(); int printed = 0; if ((alpha == null) && (sparseWeights == null)) { sb.append("BinarySMO: No model built yet.\n"); return sb.toString(); } try { sb.append("BinarySMO model\n"); sb.append("===============\n"); sb.append("**Parameters**\n"); sb.append(fullName()).append("\n\n"); sb.append("**Decision function**\n"); // If machine linear, print weight vector if (kernel.isLinear()) { sb.append("Linear support vector: use attribute weights folding instead of kernel dot products.\n"); // We can assume that the weight vector is stored in sparse // format because the classifier has been built for (int i = 0; i < sparseWeights.length; i++) { if (sparseIndices[i] != targetIndex) { if (printed > 0) { if (sparseWeights[i] >= 0) sb.append(" + "); else sb.append(" - "); } else { sb.append(" "); } sb.append(formatFlex(Math.abs(sparseWeights[i]))).append(" * "); sb.append("[").append(inputName(sparseIndices[i])).append("]\n"); printed++; } } } else { for (int i = 0; i < alpha.length; i++) { if (supportVectors.get(i)) { double val = alpha[i]; if (target[i] == 1) { if (printed > 0) { sb.append(" + "); } else { sb.append(" "); } } else { sb.append(" - "); } sb.append(formatFlex(val)).append(" * <["); for (int j = 0; j < inputNames().length; j++) { sb.append(formatFlex(train.value(i, inputNames()[j]))); if (j != inputNames().length - 1) { sb.append(","); } } sb.append("], X>\n"); printed++; } } } if (b > 0) { sb.append(" - ").append(formatFlex(b)); } else { sb.append(" + ").append(formatFlex(-b)); } if (!kernel.isLinear()) { sb.append("\n\nNumber of support vectors: ").append(supportVectors.cardinality()); } // int numEval = 0; // int numCacheHits = -1; // if (kernel != null) { // numEval = kernel.numEvals(); // numCacheHits = kernel.numCacheHits(); // } // text.append("\n\nNumber of kernel evaluations: " + numEval); // if (numCacheHits >= 0 && numEval > 0) { // double hitRatio = 1 - numEval * 1.0 / (numCacheHits + numEval); // text.append(" (" + Utils.doubleToString(hitRatio * 100, 7, 3).trim() + "% cached)"); // } } catch (Exception e) { e.printStackTrace(); sb.append("Can't print BinarySMO classifier."); } return sb.toString(); } @Override public BinarySMO withInputFilters(List<FFilter> filters) { return (BinarySMO) super.withInputFilters(filters); } @Override public BinarySMO withInputFilters(FFilter... filters) { return (BinarySMO) super.withInputFilters(filters); } }