/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.neural.networks.training.pnn; import org.encog.Encog; import org.encog.util.EngineArray; import org.encog.util.logging.EncogLogging; /** * This class determines optimal values for multiple sigmas in a PNN kernel. * This is done using a CJ (conjugate gradient) method. * * * Some of the algorithms in this class are based on C++ code from: * * Advanced Algorithms for Neural Networks: A C++ Sourcebook by Timothy Masters * John Wiley and Sons Inc (Computers); April 3, 1995 ISBN: 0471105880 */ public class DeriveMinimum { /** * Derive the minimum, using a conjugate gradient method. * * @param maxIterations * The max iterations. * @param maxError * Stop at this error rate. * @param eps * The machine's precision. * @param tol * The convergence tolerance. * @param network * The network to get the error from. * @param n * The number of variables. * @param x * The independent variable. * @param ystart * The start for y. * @param base * Work vector, must have n elements. * @param direc * Work vector, must have n elements. * @param g * Work vector, must have n elements. * @param h * Work vector, must have n elements. * @param deriv2 * Work vector, must have n elements. * @return The best error. */ public double calculate(final int maxIterations, final double maxError, final double eps, final double tol, final CalculationCriteria network, final int n, final double[] x, final double ystart, final double[] base, final double[] direc, final double[] g, final double[] h, final double[] deriv2) { double prevBest, toler, gam, improvement; final GlobalMinimumSearch globalMinimum = new GlobalMinimumSearch(); double fbest = network.calcErrorWithMultipleSigma(x, direc, deriv2, true); prevBest = 1.e30; for (int i = 0; i < n; i++) { direc[i] = -direc[i]; } EngineArray.arrayCopy(direc, g); EngineArray.arrayCopy(direc, h); int convergenceCounter = 0; int poorCJ = 0; // Main loop for (int iteration = 0; iteration < maxIterations; iteration++) { if (fbest < maxError) { break; } EncogLogging.log(EncogLogging.LEVEL_INFO, "Beginning internal Iteration #" + iteration + ", currentError=" + fbest + ",target=" + maxError); // Check for convergence if (prevBest <= 1.0) { toler = tol; } else { toler = tol * prevBest; } // Stop if there is little improvement if ((prevBest - fbest) <= toler) { if (++convergenceCounter >= 3) { break; } } else { convergenceCounter = 0; } double dot1 = 0; double dot2 = 0; double dlen = 0; dot1 = dot2 = dlen = 0.0; double high = 1.e-4; for (int i = 0; i < n; i++) { base[i] = x[i]; if (deriv2[i] > high) { high = deriv2[i]; } dot1 += direc[i] * g[i]; // Directional first derivative dot2 += direc[i] * direc[i] * deriv2[i]; // and second dlen += direc[i] * direc[i]; // Length of search vector } dlen = Math.sqrt(dlen); double scale; if (Math.abs(dot2) < Encog.DEFAULT_DOUBLE_EQUAL) { scale = 0; } else { scale = dot1 / dot2; } high = 1.5 / high; if (high < 1.e-4) { high = 1.e-4; } if (scale < 0.0) { scale = high; } else if (scale < 0.1 * high) { scale = 0.1 * high; } else if (scale > 10.0 * high) { scale = 10.0 * high; } prevBest = fbest; globalMinimum.setY2(fbest); globalMinimum.findBestRange(0.0, 2.0 * scale, -3, false, maxError, network); if (globalMinimum.getY2() < maxError) { if (globalMinimum.getY2() < fbest) { for (int i = 0; i < n; i++) { x[i] = base[i] + globalMinimum.getY2() * direc[i]; if (x[i] < 1.e-10) { x[i] = 1.e-10; } } fbest = globalMinimum.getY2(); } else { System.arraycopy(base, 0, x, 0, n); } break; } if (convergenceCounter > 0) { fbest = globalMinimum.brentmin(20, maxError, eps, 1.e-7, network, globalMinimum.getY2()); } else { fbest = globalMinimum.brentmin(10, maxError, 1.e-6, 1.e-5, network, globalMinimum.getY2()); } for (int i = 0; i < n; i++) { x[i] = base[i] + globalMinimum.getX2() * direc[i]; if (x[i] < 1.e-10) { x[i] = 1.e-10; } } improvement = (prevBest - fbest) / prevBest; if (fbest < maxError) { break; } for (int i = 0; i < n; i++) { direc[i] = -direc[i]; // negative gradient } gam = gamma(n, g, direc); if (gam < 0.0) { gam = 0.0; } if (gam > 10.0) { gam = 10.0; } if (improvement < 0.001) { ++poorCJ; } else { poorCJ = 0; } if (poorCJ >= 2) { if (gam > 1.0) { gam = 1.0; } } if (poorCJ >= 6) { poorCJ = 0; gam = 0.0; } findNewDir(n, gam, g, h, direc); } return fbest; } /** * Find gamma. * * @param n * The number of variables. * @param gam * The gamma value. * @param g * The "g" value, used for CJ algorithm. * @param h * The "h" value, used for CJ algorithm. * @param grad * The gradients. */ private void findNewDir(final int n, final double gam, final double[] g, final double[] h, final double[] grad) { int i; System.arraycopy(grad, 0, g, 0, n); for (i = 0; i < n; i++) { grad[i] = h[i] = g[i] + gam * h[i]; } } /** * Find correction for next iteration. * * @param n * The number of variables. * @param g * The "g" value, used for CJ algorithm. * @param grad * The gradients. * @return The correction for the next iteration. */ private double gamma(final int n, final double[] g, final double[] grad) { int i; double denom, numer; numer = denom = 0.0; for (i = 0; i < n; i++) { denom += g[i] * g[i]; numer += (grad[i] - g[i]) * grad[i]; // Grad is neg gradient } if (denom == 0.0) { return 0.0; } else { return numer / denom; } } }