include/ceres/gradient_problem_solver.h

   1 // Ceres Solver - A fast non-linear least squares minimizer
   2 // Copyright 2015 Google Inc. All rights reserved.
   3 // http://ceres-solver.org/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are met:
   7 //
   8 // * Redistributions of source code must retain the above copyright notice,
   9 //   this list of conditions and the following disclaimer.
  10 // * Redistributions in binary form must reproduce the above copyright notice,
  11 //   this list of conditions and the following disclaimer in the documentation
  12 //   and/or other materials provided with the distribution.
  13 // * Neither the name of Google Inc. nor the names of its contributors may be
  14 //   used to endorse or promote products derived from this software without
  15 //   specific prior written permission.
  16 //
  17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 // POSSIBILITY OF SUCH DAMAGE.
  28 //
  29 // Author: sameeragarwal@google.com (Sameer Agarwal)
  30
  31 #ifndef CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_
  32 #define CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_
  33
  34 #include <cmath>
  35 #include <string>
  36 #include <vector>
  37 #include "ceres/internal/macros.h"
  38 #include "ceres/internal/port.h"
  39 #include "ceres/iteration_callback.h"
  40 #include "ceres/types.h"
  41 #include "ceres/internal/disable_warnings.h"
  42
  43 namespace ceres {
  44
  45 class GradientProblem;
  46
  47 class CERES_EXPORT GradientProblemSolver {
  48  public:
  49   virtual ~GradientProblemSolver();
  50
  51   // The options structure contains, not surprisingly, options that control how
  52   // the solver operates. The defaults should be suitable for a wide range of
  53   // problems; however, better performance is often obtainable with tweaking.
  54   //
  55   // The constants are defined inside types.h
  56   struct CERES_EXPORT Options {
  57     // Default constructor that sets up a generic sparse problem.
  58     Options() {
  59       line_search_direction_type = LBFGS;
  60       line_search_type = WOLFE;
  61       nonlinear_conjugate_gradient_type = FLETCHER_REEVES;
  62       max_lbfgs_rank = 20;
  63       use_approximate_eigenvalue_bfgs_scaling = false;
  64       line_search_interpolation_type = CUBIC;
  65       min_line_search_step_size = 1e-9;
  66       line_search_sufficient_function_decrease = 1e-4;
  67       max_line_search_step_contraction = 1e-3;
  68       min_line_search_step_contraction = 0.6;
  69       max_num_line_search_step_size_iterations = 20;
  70       max_num_line_search_direction_restarts = 5;
  71       line_search_sufficient_curvature_decrease = 0.9;
  72       max_line_search_step_expansion = 10.0;
  73       max_num_iterations = 50;
  74       max_solver_time_in_seconds = 1e9;
  75       function_tolerance = 1e-6;
  76       gradient_tolerance = 1e-10;
  77       parameter_tolerance = 1e-8;
  78       logging_type = PER_MINIMIZER_ITERATION;
  79       minimizer_progress_to_stdout = false;
  80     }
  81
  82     // Returns true if the options struct has a valid
  83     // configuration. Returns false otherwise, and fills in *error
  84     // with a message describing the problem.
  85     bool IsValid(std::string* error) const;
  86
  87     // Minimizer options ----------------------------------------
  88     LineSearchDirectionType line_search_direction_type;
  89     LineSearchType line_search_type;
  90     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
  91
  92     // The LBFGS hessian approximation is a low rank approximation to
  93     // the inverse of the Hessian matrix. The rank of the
  94     // approximation determines (linearly) the space and time
  95     // complexity of using the approximation. Higher the rank, the
  96     // better is the quality of the approximation. The increase in
  97     // quality is however is bounded for a number of reasons.
  98     //
  99     // 1. The method only uses secant information and not actual
 100     // derivatives.
 101     //
 102     // 2. The Hessian approximation is constrained to be positive
 103     // definite.
 104     //
 105     // So increasing this rank to a large number will cost time and
 106     // space complexity without the corresponding increase in solution
 107     // quality. There are no hard and fast rules for choosing the
 108     // maximum rank. The best choice usually requires some problem
 109     // specific experimentation.
 110     //
 111     // For more theoretical and implementation details of the LBFGS
 112     // method, please see:
 113     //
 114     // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with
 115     // Limited Storage". Mathematics of Computation 35 (151): 773–782.
 116     int max_lbfgs_rank;
 117
 118     // As part of the (L)BFGS update step (BFGS) / right-multiply step (L-BFGS),
 119     // the initial inverse Hessian approximation is taken to be the Identity.
 120     // However, Oren showed that using instead I * \gamma, where \gamma is
 121     // chosen to approximate an eigenvalue of the true inverse Hessian can
 122     // result in improved convergence in a wide variety of cases. Setting
 123     // use_approximate_eigenvalue_bfgs_scaling to true enables this scaling.
 124     //
 125     // It is important to note that approximate eigenvalue scaling does not
 126     // always improve convergence, and that it can in fact significantly degrade
 127     // performance for certain classes of problem, which is why it is disabled
 128     // by default.  In particular it can degrade performance when the
 129     // sensitivity of the problem to different parameters varies significantly,
 130     // as in this case a single scalar factor fails to capture this variation
 131     // and detrimentally downscales parts of the jacobian approximation which
 132     // correspond to low-sensitivity parameters. It can also reduce the
 133     // robustness of the solution to errors in the jacobians.
 134     //
 135     // Oren S.S., Self-scaling variable metric (SSVM) algorithms
 136     // Part II: Implementation and experiments, Management Science,
 137     // 20(5), 863-874, 1974.
 138     bool use_approximate_eigenvalue_bfgs_scaling;
 139
 140     // Degree of the polynomial used to approximate the objective
 141     // function. Valid values are BISECTION, QUADRATIC and CUBIC.
 142     //
 143     // BISECTION corresponds to pure backtracking search with no
 144     // interpolation.
 145     LineSearchInterpolationType line_search_interpolation_type;
 146
 147     // If during the line search, the step_size falls below this
 148     // value, it is truncated to zero.
 149     double min_line_search_step_size;
 150
 151     // Line search parameters.
 152
 153     // Solving the line search problem exactly is computationally
 154     // prohibitive. Fortunately, line search based optimization
 155     // algorithms can still guarantee convergence if instead of an
 156     // exact solution, the line search algorithm returns a solution
 157     // which decreases the value of the objective function
 158     // sufficiently. More precisely, we are looking for a step_size
 159     // s.t.
 160     //
 161     //   f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
 162     //
 163     double line_search_sufficient_function_decrease;
 164
 165     // In each iteration of the line search,
 166     //
 167     //  new_step_size >= max_line_search_step_contraction * step_size
 168     //
 169     // Note that by definition, for contraction:
 170     //
 171     //  0 < max_step_contraction < min_step_contraction < 1
 172     //
 173     double max_line_search_step_contraction;
 174
 175     // In each iteration of the line search,
 176     //
 177     //  new_step_size <= min_line_search_step_contraction * step_size
 178     //
 179     // Note that by definition, for contraction:
 180     //
 181     //  0 < max_step_contraction < min_step_contraction < 1
 182     //
 183     double min_line_search_step_contraction;
 184
 185     // Maximum number of trial step size iterations during each line search,
 186     // if a step size satisfying the search conditions cannot be found within
 187     // this number of trials, the line search will terminate.
 188     int max_num_line_search_step_size_iterations;
 189
 190     // Maximum number of restarts of the line search direction algorithm before
 191     // terminating the optimization. Restarts of the line search direction
 192     // algorithm occur when the current algorithm fails to produce a new descent
 193     // direction. This typically indicates a numerical failure, or a breakdown
 194     // in the validity of the approximations used.
 195     int max_num_line_search_direction_restarts;
 196
 197     // The strong Wolfe conditions consist of the Armijo sufficient
 198     // decrease condition, and an additional requirement that the
 199     // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
 200     // conditions) of the gradient along the search direction
 201     // decreases sufficiently. Precisely, this second condition
 202     // is that we seek a step_size s.t.
 203     //
 204     //   |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
 205     //
 206     // Where f() is the line search objective and f'() is the derivative
 207     // of f w.r.t step_size (d f / d step_size).
 208     double line_search_sufficient_curvature_decrease;
 209
 210     // During the bracketing phase of the Wolfe search, the step size is
 211     // increased until either a point satisfying the Wolfe conditions is
 212     // found, or an upper bound for a bracket containing a point satisfying
 213     // the conditions is found.  Precisely, at each iteration of the
 214     // expansion:
 215     //
 216     //   new_step_size <= max_step_expansion * step_size.
 217     //
 218     // By definition for expansion, max_step_expansion > 1.0.
 219     double max_line_search_step_expansion;
 220
 221     // Maximum number of iterations for the minimizer to run for.
 222     int max_num_iterations;
 223
 224     // Maximum time for which the minimizer should run for.
 225     double max_solver_time_in_seconds;
 226
 227     // Minimizer terminates when
 228     //
 229     //   (new_cost - old_cost) < function_tolerance * old_cost;
 230     //
 231     double function_tolerance;
 232
 233     // Minimizer terminates when
 234     //
 235     //   max_i |x - Project(Plus(x, -g(x))| < gradient_tolerance
 236     //
 237     // This value should typically be 1e-4 * function_tolerance.
 238     double gradient_tolerance;
 239
 240     // Minimizer terminates when
 241     //
 242     //   |step|_2 <= parameter_tolerance * ( |x|_2 +  parameter_tolerance)
 243     //
 244     double parameter_tolerance;
 245
 246     // Logging options ---------------------------------------------------------
 247
 248     LoggingType logging_type;
 249
 250     // By default the Minimizer progress is logged to VLOG(1), which
 251     // is sent to STDERR depending on the vlog level. If this flag is
 252     // set to true, and logging_type is not SILENT, the logging output
 253     // is sent to STDOUT.
 254     bool minimizer_progress_to_stdout;
 255
 256     // Callbacks that are executed at the end of each iteration of the
 257     // Minimizer. An iteration may terminate midway, either due to
 258     // numerical failures or because one of the convergence tests has
 259     // been satisfied. In this case none of the callbacks are
 260     // executed.
 261
 262     // Callbacks are executed in the order that they are specified in
 263     // this vector. By default, parameter blocks are updated only at
 264     // the end of the optimization, i.e when the Minimizer
 265     // terminates. This behaviour is controlled by
 266     // update_state_every_variable. If the user wishes to have access
 267     // to the update parameter blocks when his/her callbacks are
 268     // executed, then set update_state_every_iteration to true.
 269     //
 270     // The solver does NOT take ownership of these pointers.
 271     std::vector<IterationCallback*> callbacks;
 272   };
 273
 274   struct CERES_EXPORT Summary {
 275     Summary();
 276
 277     // A brief one line description of the state of the solver after
 278     // termination.
 279     std::string BriefReport() const;
 280
 281     // A full multiline description of the state of the solver after
 282     // termination.
 283     std::string FullReport() const;
 284
 285     bool IsSolutionUsable() const;
 286
 287     // Minimizer summary -------------------------------------------------
 288     TerminationType termination_type;
 289
 290     // Reason why the solver terminated.
 291     std::string message;
 292
 293     // Cost of the problem (value of the objective function) before
 294     // the optimization.
 295     double initial_cost;
 296
 297     // Cost of the problem (value of the objective function) after the
 298     // optimization.
 299     double final_cost;
 300
 301     // IterationSummary for each minimizer iteration in order.
 302     std::vector<IterationSummary> iterations;
 303
 304     // Number of times the cost (and not the gradient) was evaluated.
 305     int num_cost_evaluations;
 306
 307     // Number of times the gradient (and the cost) were evaluated.
 308     int num_gradient_evaluations;
 309
 310     // Sum total of all time spent inside Ceres when Solve is called.
 311     double total_time_in_seconds;
 312
 313     // Time (in seconds) spent evaluating the cost.
 314     double cost_evaluation_time_in_seconds;
 315
 316     // Time (in seconds) spent evaluating the gradient.
 317     double gradient_evaluation_time_in_seconds;
 318
 319     // Time (in seconds) spent minimizing the interpolating polynomial
 320     // to compute the next candidate step size as part of a line search.
 321     double line_search_polynomial_minimization_time_in_seconds;
 322
 323     // Number of parameters in the probem.
 324     int num_parameters;
 325
 326     // Dimension of the tangent space of the problem.
 327     int num_local_parameters;
 328
 329     // Type of line search direction used.
 330     LineSearchDirectionType line_search_direction_type;
 331
 332     // Type of the line search algorithm used.
 333     LineSearchType line_search_type;
 334
 335     //  When performing line search, the degree of the polynomial used
 336     //  to approximate the objective function.
 337     LineSearchInterpolationType line_search_interpolation_type;
 338
 339     // If the line search direction is NONLINEAR_CONJUGATE_GRADIENT,
 340     // then this indicates the particular variant of non-linear
 341     // conjugate gradient used.
 342     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
 343
 344     // If the type of the line search direction is LBFGS, then this
 345     // indicates the rank of the Hessian approximation.
 346     int max_lbfgs_rank;
 347   };
 348
 349   // Once a least squares problem has been built, this function takes
 350   // the problem and optimizes it based on the values of the options
 351   // parameters. Upon return, a detailed summary of the work performed
 352   // by the preprocessor, the non-linear minmizer and the linear
 353   // solver are reported in the summary object.
 354   virtual void Solve(const GradientProblemSolver::Options& options,
 355                      const GradientProblem& problem,
 356                      double* parameters,
 357                      GradientProblemSolver::Summary* summary);
 358 };
 359
 360 // Helper function which avoids going through the interface.
 361 CERES_EXPORT void Solve(const GradientProblemSolver::Options& options,
 362                         const GradientProblem& problem,
 363                         double* parameters,
 364                         GradientProblemSolver::Summary* summary);
 365
 366 }  // namespace ceres
 367
 368 #include "ceres/internal/reenable_warnings.h"
 369
 370 #endif  // CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_