1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2015 Google Inc. All rights reserved.
3 // http://ceres-solver.org/
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
8 // * Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 // this list of conditions and the following disclaimer in the documentation
12 // and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 // used to endorse or promote products derived from this software without
15 // specific prior written permission.
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
31 #ifndef CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_
32 #define CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_
37 #include "ceres/internal/macros.h"
38 #include "ceres/internal/port.h"
39 #include "ceres/iteration_callback.h"
40 #include "ceres/types.h"
41 #include "ceres/internal/disable_warnings.h"
45 class GradientProblem;
47 class CERES_EXPORT GradientProblemSolver {
49 virtual ~GradientProblemSolver();
51 // The options structure contains, not surprisingly, options that control how
52 // the solver operates. The defaults should be suitable for a wide range of
53 // problems; however, better performance is often obtainable with tweaking.
55 // The constants are defined inside types.h
56 struct CERES_EXPORT Options {
57 // Default constructor that sets up a generic sparse problem.
59 line_search_direction_type = LBFGS;
60 line_search_type = WOLFE;
61 nonlinear_conjugate_gradient_type = FLETCHER_REEVES;
63 use_approximate_eigenvalue_bfgs_scaling = false;
64 line_search_interpolation_type = CUBIC;
65 min_line_search_step_size = 1e-9;
66 line_search_sufficient_function_decrease = 1e-4;
67 max_line_search_step_contraction = 1e-3;
68 min_line_search_step_contraction = 0.6;
69 max_num_line_search_step_size_iterations = 20;
70 max_num_line_search_direction_restarts = 5;
71 line_search_sufficient_curvature_decrease = 0.9;
72 max_line_search_step_expansion = 10.0;
73 max_num_iterations = 50;
74 max_solver_time_in_seconds = 1e9;
75 function_tolerance = 1e-6;
76 gradient_tolerance = 1e-10;
77 parameter_tolerance = 1e-8;
78 logging_type = PER_MINIMIZER_ITERATION;
79 minimizer_progress_to_stdout = false;
82 // Returns true if the options struct has a valid
83 // configuration. Returns false otherwise, and fills in *error
84 // with a message describing the problem.
85 bool IsValid(std::string* error) const;
87 // Minimizer options ----------------------------------------
88 LineSearchDirectionType line_search_direction_type;
89 LineSearchType line_search_type;
90 NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
92 // The LBFGS hessian approximation is a low rank approximation to
93 // the inverse of the Hessian matrix. The rank of the
94 // approximation determines (linearly) the space and time
95 // complexity of using the approximation. Higher the rank, the
96 // better is the quality of the approximation. The increase in
97 // quality is however is bounded for a number of reasons.
99 // 1. The method only uses secant information and not actual
102 // 2. The Hessian approximation is constrained to be positive
105 // So increasing this rank to a large number will cost time and
106 // space complexity without the corresponding increase in solution
107 // quality. There are no hard and fast rules for choosing the
108 // maximum rank. The best choice usually requires some problem
109 // specific experimentation.
111 // For more theoretical and implementation details of the LBFGS
112 // method, please see:
114 // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with
115 // Limited Storage". Mathematics of Computation 35 (151): 773–782.
118 // As part of the (L)BFGS update step (BFGS) / right-multiply step (L-BFGS),
119 // the initial inverse Hessian approximation is taken to be the Identity.
120 // However, Oren showed that using instead I * \gamma, where \gamma is
121 // chosen to approximate an eigenvalue of the true inverse Hessian can
122 // result in improved convergence in a wide variety of cases. Setting
123 // use_approximate_eigenvalue_bfgs_scaling to true enables this scaling.
125 // It is important to note that approximate eigenvalue scaling does not
126 // always improve convergence, and that it can in fact significantly degrade
127 // performance for certain classes of problem, which is why it is disabled
128 // by default. In particular it can degrade performance when the
129 // sensitivity of the problem to different parameters varies significantly,
130 // as in this case a single scalar factor fails to capture this variation
131 // and detrimentally downscales parts of the jacobian approximation which
132 // correspond to low-sensitivity parameters. It can also reduce the
133 // robustness of the solution to errors in the jacobians.
135 // Oren S.S., Self-scaling variable metric (SSVM) algorithms
136 // Part II: Implementation and experiments, Management Science,
137 // 20(5), 863-874, 1974.
138 bool use_approximate_eigenvalue_bfgs_scaling;
140 // Degree of the polynomial used to approximate the objective
141 // function. Valid values are BISECTION, QUADRATIC and CUBIC.
143 // BISECTION corresponds to pure backtracking search with no
145 LineSearchInterpolationType line_search_interpolation_type;
147 // If during the line search, the step_size falls below this
148 // value, it is truncated to zero.
149 double min_line_search_step_size;
151 // Line search parameters.
153 // Solving the line search problem exactly is computationally
154 // prohibitive. Fortunately, line search based optimization
155 // algorithms can still guarantee convergence if instead of an
156 // exact solution, the line search algorithm returns a solution
157 // which decreases the value of the objective function
158 // sufficiently. More precisely, we are looking for a step_size
161 // f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
163 double line_search_sufficient_function_decrease;
165 // In each iteration of the line search,
167 // new_step_size >= max_line_search_step_contraction * step_size
169 // Note that by definition, for contraction:
171 // 0 < max_step_contraction < min_step_contraction < 1
173 double max_line_search_step_contraction;
175 // In each iteration of the line search,
177 // new_step_size <= min_line_search_step_contraction * step_size
179 // Note that by definition, for contraction:
181 // 0 < max_step_contraction < min_step_contraction < 1
183 double min_line_search_step_contraction;
185 // Maximum number of trial step size iterations during each line search,
186 // if a step size satisfying the search conditions cannot be found within
187 // this number of trials, the line search will terminate.
188 int max_num_line_search_step_size_iterations;
190 // Maximum number of restarts of the line search direction algorithm before
191 // terminating the optimization. Restarts of the line search direction
192 // algorithm occur when the current algorithm fails to produce a new descent
193 // direction. This typically indicates a numerical failure, or a breakdown
194 // in the validity of the approximations used.
195 int max_num_line_search_direction_restarts;
197 // The strong Wolfe conditions consist of the Armijo sufficient
198 // decrease condition, and an additional requirement that the
199 // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
200 // conditions) of the gradient along the search direction
201 // decreases sufficiently. Precisely, this second condition
202 // is that we seek a step_size s.t.
204 // |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
206 // Where f() is the line search objective and f'() is the derivative
207 // of f w.r.t step_size (d f / d step_size).
208 double line_search_sufficient_curvature_decrease;
210 // During the bracketing phase of the Wolfe search, the step size is
211 // increased until either a point satisfying the Wolfe conditions is
212 // found, or an upper bound for a bracket containing a point satisfying
213 // the conditions is found. Precisely, at each iteration of the
216 // new_step_size <= max_step_expansion * step_size.
218 // By definition for expansion, max_step_expansion > 1.0.
219 double max_line_search_step_expansion;
221 // Maximum number of iterations for the minimizer to run for.
222 int max_num_iterations;
224 // Maximum time for which the minimizer should run for.
225 double max_solver_time_in_seconds;
227 // Minimizer terminates when
229 // (new_cost - old_cost) < function_tolerance * old_cost;
231 double function_tolerance;
233 // Minimizer terminates when
235 // max_i |x - Project(Plus(x, -g(x))| < gradient_tolerance
237 // This value should typically be 1e-4 * function_tolerance.
238 double gradient_tolerance;
240 // Minimizer terminates when
242 // |step|_2 <= parameter_tolerance * ( |x|_2 + parameter_tolerance)
244 double parameter_tolerance;
246 // Logging options ---------------------------------------------------------
248 LoggingType logging_type;
250 // By default the Minimizer progress is logged to VLOG(1), which
251 // is sent to STDERR depending on the vlog level. If this flag is
252 // set to true, and logging_type is not SILENT, the logging output
253 // is sent to STDOUT.
254 bool minimizer_progress_to_stdout;
256 // Callbacks that are executed at the end of each iteration of the
257 // Minimizer. An iteration may terminate midway, either due to
258 // numerical failures or because one of the convergence tests has
259 // been satisfied. In this case none of the callbacks are
262 // Callbacks are executed in the order that they are specified in
263 // this vector. By default, parameter blocks are updated only at
264 // the end of the optimization, i.e when the Minimizer
265 // terminates. This behaviour is controlled by
266 // update_state_every_variable. If the user wishes to have access
267 // to the update parameter blocks when his/her callbacks are
268 // executed, then set update_state_every_iteration to true.
270 // The solver does NOT take ownership of these pointers.
271 std::vector<IterationCallback*> callbacks;
274 struct CERES_EXPORT Summary {
277 // A brief one line description of the state of the solver after
279 std::string BriefReport() const;
281 // A full multiline description of the state of the solver after
283 std::string FullReport() const;
285 bool IsSolutionUsable() const;
287 // Minimizer summary -------------------------------------------------
288 TerminationType termination_type;
290 // Reason why the solver terminated.
293 // Cost of the problem (value of the objective function) before
297 // Cost of the problem (value of the objective function) after the
301 // IterationSummary for each minimizer iteration in order.
302 std::vector<IterationSummary> iterations;
304 // Number of times the cost (and not the gradient) was evaluated.
305 int num_cost_evaluations;
307 // Number of times the gradient (and the cost) were evaluated.
308 int num_gradient_evaluations;
310 // Sum total of all time spent inside Ceres when Solve is called.
311 double total_time_in_seconds;
313 // Time (in seconds) spent evaluating the cost.
314 double cost_evaluation_time_in_seconds;
316 // Time (in seconds) spent evaluating the gradient.
317 double gradient_evaluation_time_in_seconds;
319 // Time (in seconds) spent minimizing the interpolating polynomial
320 // to compute the next candidate step size as part of a line search.
321 double line_search_polynomial_minimization_time_in_seconds;
323 // Number of parameters in the probem.
326 // Dimension of the tangent space of the problem.
327 int num_local_parameters;
329 // Type of line search direction used.
330 LineSearchDirectionType line_search_direction_type;
332 // Type of the line search algorithm used.
333 LineSearchType line_search_type;
335 // When performing line search, the degree of the polynomial used
336 // to approximate the objective function.
337 LineSearchInterpolationType line_search_interpolation_type;
339 // If the line search direction is NONLINEAR_CONJUGATE_GRADIENT,
340 // then this indicates the particular variant of non-linear
341 // conjugate gradient used.
342 NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
344 // If the type of the line search direction is LBFGS, then this
345 // indicates the rank of the Hessian approximation.
349 // Once a least squares problem has been built, this function takes
350 // the problem and optimizes it based on the values of the options
351 // parameters. Upon return, a detailed summary of the work performed
352 // by the preprocessor, the non-linear minmizer and the linear
353 // solver are reported in the summary object.
354 virtual void Solve(const GradientProblemSolver::Options& options,
355 const GradientProblem& problem,
357 GradientProblemSolver::Summary* summary);
360 // Helper function which avoids going through the interface.
361 CERES_EXPORT void Solve(const GradientProblemSolver::Options& options,
362 const GradientProblem& problem,
364 GradientProblemSolver::Summary* summary);
368 #include "ceres/internal/reenable_warnings.h"
370 #endif // CERES_PUBLIC_GRADIENT_PROBLEM_SOLVER_H_