/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law * or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.apache.commons.math4.fitting.leastsquares; import org.apache.commons.math4.fitting.leastsquares.LeastSquaresBuilder; import org.apache.commons.math4.fitting.leastsquares.LeastSquaresProblem; import org.apache.commons.math4.linear.ArrayRealVector; import org.apache.commons.math4.linear.DiagonalMatrix; import org.apache.commons.math4.linear.RealVector; import org.apache.commons.math4.stat.descriptive.StatisticalSummary; import org.apache.commons.math4.stat.descriptive.SummaryStatistics; import org.apache.commons.math4.util.FastMath; import org.junit.Assert; import org.junit.Test; import java.awt.geom.Point2D; import java.util.ArrayList; import java.util.List; /** * This class demonstrates the main functionality of the * {@link LeastSquaresProblem.Evaluation}, common to the * optimizer implementations in package * {@link org.apache.commons.math4.fitting.leastsquares}. * <br> * Not enabled by default, as the class name does not end with "Test". * <br> * Invoke by running * <pre><code> * mvn test -Dtest=EvaluationTestValidation * </code></pre> * or by running * <pre><code> * mvn test -Dtest=EvaluationTestValidation -DargLine="-DmcRuns=1234 -server" * </code></pre> */ public class EvaluationTestValidation { /** Number of runs. */ private static final int MONTE_CARLO_RUNS = Integer.parseInt(System.getProperty("mcRuns", "100")); /** * Using a Monte-Carlo procedure, this test checks the error estimations * as provided by the square-root of the diagonal elements of the * covariance matrix. * <br> * The test generates sets of observations, each sampled from * a Gaussian distribution. * <br> * The optimization problem solved is defined in class * {@link StraightLineProblem}. * <br> * The output (on stdout) will be a table summarizing the distribution * of parameters generated by the Monte-Carlo process and by the direct * estimation provided by the diagonal elements of the covariance matrix. */ @Test public void testParametersErrorMonteCarloObservations() { // Error on the observations. final double yError = 15; // True values of the parameters. final double slope = 123.456; final double offset = -98.765; // Samples generator. final RandomStraightLinePointGenerator lineGenerator = new RandomStraightLinePointGenerator(slope, offset, yError, -1e3, 1e4, 138577L); // Number of observations. final int numObs = 100; // XXX Should be a command-line option. // number of parameters. final int numParams = 2; // Parameters found for each of Monte-Carlo run. final SummaryStatistics[] paramsFoundByDirectSolution = new SummaryStatistics[numParams]; // Sigma estimations (square-root of the diagonal elements of the // covariance matrix), for each Monte-Carlo run. final SummaryStatistics[] sigmaEstimate = new SummaryStatistics[numParams]; // Initialize statistics accumulators. for (int i = 0; i < numParams; i++) { paramsFoundByDirectSolution[i] = new SummaryStatistics(); sigmaEstimate[i] = new SummaryStatistics(); } final RealVector init = new ArrayRealVector(new double[]{ slope, offset }, false); // Monte-Carlo (generates many sets of observations). final int mcRepeat = MONTE_CARLO_RUNS; int mcCount = 0; while (mcCount < mcRepeat) { // Observations. final Point2D.Double[] obs = lineGenerator.generate(numObs); final StraightLineProblem problem = new StraightLineProblem(yError); for (int i = 0; i < numObs; i++) { final Point2D.Double p = obs[i]; problem.addPoint(p.x, p.y); } // Direct solution (using simple regression). final double[] regress = problem.solve(); // Estimation of the standard deviation (diagonal elements of the // covariance matrix). final LeastSquaresProblem lsp = builder(problem).build(); final RealVector sigma = lsp.evaluate(init).getSigma(1e-14); // Accumulate statistics. for (int i = 0; i < numParams; i++) { paramsFoundByDirectSolution[i].addValue(regress[i]); sigmaEstimate[i].addValue(sigma.getEntry(i)); } // Next Monte-Carlo. ++mcCount; } // Print statistics. final String line = "--------------------------------------------------------------"; System.out.println(" True value Mean Std deviation"); for (int i = 0; i < numParams; i++) { System.out.println(line); System.out.println("Parameter #" + i); StatisticalSummary s = paramsFoundByDirectSolution[i].getSummary(); System.out.printf(" %+.6e %+.6e %+.6e\n", init.getEntry(i), s.getMean(), s.getStandardDeviation()); s = sigmaEstimate[i].getSummary(); System.out.printf("sigma: %+.6e (%+.6e)\n", s.getMean(), s.getStandardDeviation()); } System.out.println(line); // Check the error estimation. for (int i = 0; i < numParams; i++) { Assert.assertEquals(paramsFoundByDirectSolution[i].getSummary().getStandardDeviation(), sigmaEstimate[i].getSummary().getMean(), 8e-2); } } /** * In this test, the set of observations is fixed. * Using a Monte-Carlo procedure, it generates sets of parameters, * and determine the parameter change that will result in the * normalized chi-square becoming larger by one than the value from * the best fit solution. * <br> * The optimization problem solved is defined in class * {@link StraightLineProblem}. * <br> * The output (on stdout) will be a list of lines containing: * <ul> * <li>slope of the straight line,</li> * <li>intercept of the straight line,</li> * <li>chi-square of the solution defined by the above two values.</li> * </ul> * The output is separated into two blocks (with a blank line between * them); the first block will contain all parameter sets for which * {@code chi2 < chi2_b + 1} * and the second block, all sets for which * {@code chi2 >= chi2_b + 1} * where {@code chi2_b} is the lowest chi-square (corresponding to the * best solution). */ @Test public void testParametersErrorMonteCarloParameters() { // Error on the observations. final double yError = 15; // True values of the parameters. final double slope = 123.456; final double offset = -98.765; // Samples generator. final RandomStraightLinePointGenerator lineGenerator = new RandomStraightLinePointGenerator(slope, offset, yError, -1e3, 1e4, 13839013L); // Number of observations. final int numObs = 10; // number of parameters. // Create a single set of observations. final Point2D.Double[] obs = lineGenerator.generate(numObs); final StraightLineProblem problem = new StraightLineProblem(yError); for (int i = 0; i < numObs; i++) { final Point2D.Double p = obs[i]; problem.addPoint(p.x, p.y); } // Direct solution (using simple regression). final RealVector regress = new ArrayRealVector(problem.solve(), false); // Dummy optimizer (to compute the chi-square). final LeastSquaresProblem lsp = builder(problem).build(); // Get chi-square of the best parameters set for the given set of // observations. final double bestChi2N = getChi2N(lsp, regress); final RealVector sigma = lsp.evaluate(regress).getSigma(1e-14); // Monte-Carlo (generates a grid of parameters). final int mcRepeat = MONTE_CARLO_RUNS; final int gridSize = (int) FastMath.sqrt(mcRepeat); // Parameters found for each of Monte-Carlo run. // Index 0 = slope // Index 1 = offset // Index 2 = normalized chi2 final List<double[]> paramsAndChi2 = new ArrayList<>(gridSize * gridSize); final double slopeRange = 10 * sigma.getEntry(0); final double offsetRange = 10 * sigma.getEntry(1); final double minSlope = slope - 0.5 * slopeRange; final double minOffset = offset - 0.5 * offsetRange; final double deltaSlope = slopeRange/ gridSize; final double deltaOffset = offsetRange / gridSize; for (int i = 0; i < gridSize; i++) { final double s = minSlope + i * deltaSlope; for (int j = 0; j < gridSize; j++) { final double o = minOffset + j * deltaOffset; final double chi2N = getChi2N(lsp, new ArrayRealVector(new double[] {s, o}, false)); paramsAndChi2.add(new double[] {s, o, chi2N}); } } // Output (for use with "gnuplot"). // Some info. // For plotting separately sets of parameters that have a large chi2. final double chi2NPlusOne = bestChi2N + 1; int numLarger = 0; final String lineFmt = "%+.10e %+.10e %.8e\n"; // Point with smallest chi-square. System.out.printf(lineFmt, regress.getEntry(0), regress.getEntry(1), bestChi2N); System.out.println(); // Empty line. // Points within the confidence interval. for (double[] d : paramsAndChi2) { if (d[2] <= chi2NPlusOne) { System.out.printf(lineFmt, d[0], d[1], d[2]); } } System.out.println(); // Empty line. // Points outside the confidence interval. for (double[] d : paramsAndChi2) { if (d[2] > chi2NPlusOne) { ++numLarger; System.out.printf(lineFmt, d[0], d[1], d[2]); } } System.out.println(); // Empty line. System.out.println("# sigma=" + sigma.toString()); System.out.println("# " + numLarger + " sets filtered out"); } LeastSquaresBuilder builder(StraightLineProblem problem){ return new LeastSquaresBuilder() .model(problem.getModelFunction(), problem.getModelFunctionJacobian()) .target(problem.target()) .weight(new DiagonalMatrix(problem.weight())) //unused start point to avoid NPE .start(new double[2]); } /** * @return the normalized chi-square. */ private double getChi2N(LeastSquaresProblem lsp, RealVector params) { final double cost = lsp.evaluate(params).getCost(); return cost * cost / (lsp.getObservationSize() - params.getDimension()); } }