1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2015 Google Inc. All rights reserved.
3 // http://ceres-solver.org/
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
8 // * Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 // this list of conditions and the following disclaimer in the documentation
12 // and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 // used to endorse or promote products derived from this software without
15 // specific prior written permission.
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
31 // The LossFunction interface is the way users describe how residuals
32 // are converted to cost terms for the overall problem cost function.
33 // For the exact manner in which loss functions are converted to the
34 // overall cost for a problem, see problem.h.
36 // For least squares problem where there are no outliers and standard
37 // squared loss is expected, it is not necessary to create a loss
38 // function; instead passing a NULL to the problem when adding
39 // residuals implies a standard squared loss.
41 // For least squares problems where the minimization may encounter
42 // input terms that contain outliers, that is, completely bogus
43 // measurements, it is important to use a loss function that reduces
44 // their associated penalty.
46 // Consider a structure from motion problem. The unknowns are 3D
47 // points and camera parameters, and the measurements are image
48 // coordinates describing the expected reprojected position for a
49 // point in a camera. For example, we want to model the geometry of a
50 // street scene with fire hydrants and cars, observed by a moving
51 // camera with unknown parameters, and the only 3D points we care
52 // about are the pointy tippy-tops of the fire hydrants. Our magic
53 // image processing algorithm, which is responsible for producing the
54 // measurements that are input to Ceres, has found and matched all
55 // such tippy-tops in all image frames, except that in one of the
56 // frame it mistook a car's headlight for a hydrant. If we didn't do
57 // anything special (i.e. if we used a basic quadratic loss), the
58 // residual for the erroneous measurement will result in extreme error
59 // due to the quadratic nature of squared loss. This results in the
60 // entire solution getting pulled away from the optimimum to reduce
61 // the large error that would otherwise be attributed to the wrong
64 // Using a robust loss function, the cost for large residuals is
65 // reduced. In the example above, this leads to outlier terms getting
66 // downweighted so they do not overly influence the final solution.
68 // What cost function is best?
70 // In general, there isn't a principled way to select a robust loss
71 // function. The authors suggest starting with a non-robust cost, then
72 // only experimenting with robust loss functions if standard squared
75 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
76 #define CERES_PUBLIC_LOSS_FUNCTION_H_
78 #include "glog/logging.h"
79 #include "ceres/internal/macros.h"
80 #include "ceres/internal/scoped_ptr.h"
81 #include "ceres/types.h"
82 #include "ceres/internal/disable_warnings.h"
86 class CERES_EXPORT LossFunction {
88 virtual ~LossFunction() {}
90 // For a residual vector with squared 2-norm 'sq_norm', this method
91 // is required to fill in the value and derivatives of the loss
92 // function (rho in this example):
94 // out[0] = rho(sq_norm),
95 // out[1] = rho'(sq_norm),
96 // out[2] = rho''(sq_norm),
98 // Here the convention is that the contribution of a term to the
99 // cost function is given by 1/2 rho(s), where
101 // s = ||residuals||^2.
103 // Calling the method with a negative value of 's' is an error and
104 // the implementations are not required to handle that case.
106 // Most sane choices of rho() satisfy:
110 // rho'(s) < 1 in outlier region,
111 // rho''(s) < 0 in outlier region,
113 // so that they mimic the least squares cost for small residuals.
114 virtual void Evaluate(double sq_norm, double out[3]) const = 0;
117 // Some common implementations follow below.
119 // Note: in the region of interest (i.e. s < 3) we have:
120 // TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
123 // This corresponds to no robustification.
127 // At s = 0: rho = [0, 1, 0].
129 // It is not normally necessary to use this, as passing NULL for the
130 // loss function when building the problem accomplishes the same
132 class CERES_EXPORT TrivialLoss : public LossFunction {
134 virtual void Evaluate(double, double*) const;
139 // Given one robustifier
141 // one can change the length scale at which robustification takes
142 // place, by adding a scale factor 'a' as follows:
144 // s -> a^2 rho(s / a^2).
146 // The first and second derivatives are:
148 // s -> rho'(s / a^2),
149 // s -> (1 / a^2) rho''(s / a^2),
151 // but the behaviour near s = 0 is the same as the original function,
154 // rho(s) = s + higher order terms,
155 // a^2 rho(s / a^2) = s + higher order terms.
157 // The scalar 'a' should be positive.
159 // The reason for the appearance of squaring is that 'a' is in the
160 // units of the residual vector norm whereas 's' is a squared
161 // norm. For applications it is more convenient to specify 'a' than
162 // its square. The commonly used robustifiers below are described in
163 // un-scaled format (a = 1) but their implementations work for any
164 // non-zero value of 'a'.
168 // rho(s) = s for s <= 1,
169 // rho(s) = 2 sqrt(s) - 1 for s >= 1.
171 // At s = 0: rho = [0, 1, 0].
173 // The scaling parameter 'a' corresponds to 'delta' on this page:
174 // http://en.wikipedia.org/wiki/Huber_Loss_Function
175 class CERES_EXPORT HuberLoss : public LossFunction {
177 explicit HuberLoss(double a) : a_(a), b_(a * a) { }
178 virtual void Evaluate(double, double*) const;
186 // Soft L1, similar to Huber but smooth.
188 // rho(s) = 2 (sqrt(1 + s) - 1).
190 // At s = 0: rho = [0, 1, -1/2].
191 class CERES_EXPORT SoftLOneLoss : public LossFunction {
193 explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
194 virtual void Evaluate(double, double*) const;
203 // Inspired by the Cauchy distribution
205 // rho(s) = log(1 + s).
207 // At s = 0: rho = [0, 1, -1].
208 class CERES_EXPORT CauchyLoss : public LossFunction {
210 explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
211 virtual void Evaluate(double, double*) const;
220 // Loss that is capped beyond a certain level using the arc-tangent function.
221 // The scaling parameter 'a' determines the level where falloff occurs.
222 // For costs much smaller than 'a', the loss function is linear and behaves like
223 // TrivialLoss, and for values much larger than 'a' the value asymptotically
224 // approaches the constant value of a * PI / 2.
226 // rho(s) = a atan(s / a).
228 // At s = 0: rho = [0, 1, 0].
229 class CERES_EXPORT ArctanLoss : public LossFunction {
231 explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
232 virtual void Evaluate(double, double*) const;
240 // Loss function that maps to approximately zero cost in a range around the
241 // origin, and reverts to linear in error (quadratic in cost) beyond this range.
242 // The tolerance parameter 'a' sets the nominal point at which the
243 // transition occurs, and the transition size parameter 'b' sets the nominal
244 // distance over which most of the transition occurs. Both a and b must be
245 // greater than zero, and typically b will be set to a fraction of a.
246 // The slope rho'[s] varies smoothly from about 0 at s <= a - b to
247 // about 1 at s >= a + b.
249 // The term is computed as:
251 // rho(s) = b log(1 + exp((s - a) / b)) - c0.
253 // where c0 is chosen so that rho(0) == 0
255 // c0 = b log(1 + exp(-a / b)
257 // This has the following useful properties:
259 // rho(s) == 0 for s = 0
260 // rho'(s) ~= 0 for s << a - b
261 // rho'(s) ~= 1 for s >> a + b
262 // rho''(s) > 0 for all s
264 // In addition, all derivatives are continuous, and the curvature is
265 // concentrated in the range a - b to a + b.
267 // At s = 0: rho = [0, ~0, ~0].
268 class CERES_EXPORT TolerantLoss : public LossFunction {
270 explicit TolerantLoss(double a, double b);
271 virtual void Evaluate(double, double*) const;
274 const double a_, b_, c_;
277 // This is the Tukey biweight loss function which aggressively
278 // attempts to suppress large errors.
280 // The term is computed as:
282 // rho(s) = a^2 / 6 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
283 // rho(s) = a^2 / 6 for s > a^2.
285 // At s = 0: rho = [0, 0.5, -1 / a^2]
286 class CERES_EXPORT TukeyLoss : public ceres::LossFunction {
288 explicit TukeyLoss(double a) : a_squared_(a * a) { }
289 virtual void Evaluate(double, double*) const;
292 const double a_squared_;
295 // Composition of two loss functions. The error is the result of first
296 // evaluating g followed by f to yield the composition f(g(s)).
297 // The loss functions must not be NULL.
298 class CERES_EXPORT ComposedLoss : public LossFunction {
300 explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
301 const LossFunction* g, Ownership ownership_g);
302 virtual ~ComposedLoss();
303 virtual void Evaluate(double, double*) const;
306 internal::scoped_ptr<const LossFunction> f_, g_;
307 const Ownership ownership_f_, ownership_g_;
310 // The discussion above has to do with length scaling: it affects the space
311 // in which s is measured. Sometimes you want to simply scale the output
312 // value of the robustifier. For example, you might want to weight
313 // different error terms differently (e.g., weight pixel reprojection
314 // errors differently from terrain errors).
316 // If rho is the wrapped robustifier, then this simply outputs
319 // The first and second derivatives are, not surprisingly
323 // Since we treat the a NULL Loss function as the Identity loss
324 // function, rho = NULL is a valid input and will result in the input
325 // being scaled by a. This provides a simple way of implementing a
326 // scaled ResidualBlock.
327 class CERES_EXPORT ScaledLoss : public LossFunction {
329 // Constructs a ScaledLoss wrapping another loss function. Takes
330 // ownership of the wrapped loss function or not depending on the
331 // ownership parameter.
332 ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
333 rho_(rho), a_(a), ownership_(ownership) { }
335 virtual ~ScaledLoss() {
336 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
340 virtual void Evaluate(double, double*) const;
343 internal::scoped_ptr<const LossFunction> rho_;
345 const Ownership ownership_;
346 CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
349 // Sometimes after the optimization problem has been constructed, we
350 // wish to mutate the scale of the loss function. For example, when
351 // performing estimation from data which has substantial outliers,
352 // convergence can be improved by starting out with a large scale,
353 // optimizing the problem and then reducing the scale. This can have
354 // better convergence behaviour than just using a loss function with a
357 // This templated class allows the user to implement a loss function
358 // whose scale can be mutated after an optimization problem has been
361 // Since we treat the a NULL Loss function as the Identity loss
362 // function, rho = NULL is a valid input.
368 // // Add parameter blocks
370 // CostFunction* cost_function =
371 // new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
372 // new UW_Camera_Mapper(feature_x, feature_y));
374 // LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
376 // problem.AddResidualBlock(cost_function, loss_function, parameters);
378 // Solver::Options options;
379 // Solger::Summary summary;
381 // Solve(options, &problem, &summary)
383 // loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
385 // Solve(options, &problem, &summary)
387 class CERES_EXPORT LossFunctionWrapper : public LossFunction {
389 LossFunctionWrapper(LossFunction* rho, Ownership ownership)
390 : rho_(rho), ownership_(ownership) {
393 virtual ~LossFunctionWrapper() {
394 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
399 virtual void Evaluate(double sq_norm, double out[3]) const {
400 if (rho_.get() == NULL) {
406 rho_->Evaluate(sq_norm, out);
410 void Reset(LossFunction* rho, Ownership ownership) {
411 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
415 ownership_ = ownership;
419 internal::scoped_ptr<const LossFunction> rho_;
420 Ownership ownership_;
421 CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
426 #include "ceres/internal/reenable_warnings.h"
428 #endif // CERES_PUBLIC_LOSS_FUNCTION_H_