include/ceres/loss_function.h

   1 // Ceres Solver - A fast non-linear least squares minimizer
   2 // Copyright 2015 Google Inc. All rights reserved.
   3 // http://ceres-solver.org/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are met:
   7 //
   8 // * Redistributions of source code must retain the above copyright notice,
   9 //   this list of conditions and the following disclaimer.
  10 // * Redistributions in binary form must reproduce the above copyright notice,
  11 //   this list of conditions and the following disclaimer in the documentation
  12 //   and/or other materials provided with the distribution.
  13 // * Neither the name of Google Inc. nor the names of its contributors may be
  14 //   used to endorse or promote products derived from this software without
  15 //   specific prior written permission.
  16 //
  17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 // POSSIBILITY OF SUCH DAMAGE.
  28 //
  29 // Author: sameeragarwal@google.com (Sameer Agarwal)
  30 //
  31 // The LossFunction interface is the way users describe how residuals
  32 // are converted to cost terms for the overall problem cost function.
  33 // For the exact manner in which loss functions are converted to the
  34 // overall cost for a problem, see problem.h.
  35 //
  36 // For least squares problem where there are no outliers and standard
  37 // squared loss is expected, it is not necessary to create a loss
  38 // function; instead passing a NULL to the problem when adding
  39 // residuals implies a standard squared loss.
  40 //
  41 // For least squares problems where the minimization may encounter
  42 // input terms that contain outliers, that is, completely bogus
  43 // measurements, it is important to use a loss function that reduces
  44 // their associated penalty.
  45 //
  46 // Consider a structure from motion problem. The unknowns are 3D
  47 // points and camera parameters, and the measurements are image
  48 // coordinates describing the expected reprojected position for a
  49 // point in a camera. For example, we want to model the geometry of a
  50 // street scene with fire hydrants and cars, observed by a moving
  51 // camera with unknown parameters, and the only 3D points we care
  52 // about are the pointy tippy-tops of the fire hydrants. Our magic
  53 // image processing algorithm, which is responsible for producing the
  54 // measurements that are input to Ceres, has found and matched all
  55 // such tippy-tops in all image frames, except that in one of the
  56 // frame it mistook a car's headlight for a hydrant. If we didn't do
  57 // anything special (i.e. if we used a basic quadratic loss), the
  58 // residual for the erroneous measurement will result in extreme error
  59 // due to the quadratic nature of squared loss. This results in the
  60 // entire solution getting pulled away from the optimimum to reduce
  61 // the large error that would otherwise be attributed to the wrong
  62 // measurement.
  63 //
  64 // Using a robust loss function, the cost for large residuals is
  65 // reduced. In the example above, this leads to outlier terms getting
  66 // downweighted so they do not overly influence the final solution.
  67 //
  68 // What cost function is best?
  69 //
  70 // In general, there isn't a principled way to select a robust loss
  71 // function. The authors suggest starting with a non-robust cost, then
  72 // only experimenting with robust loss functions if standard squared
  73 // loss doesn't work.
  74
  75 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
  76 #define CERES_PUBLIC_LOSS_FUNCTION_H_
  77
  78 #include "glog/logging.h"
  79 #include "ceres/internal/macros.h"
  80 #include "ceres/internal/scoped_ptr.h"
  81 #include "ceres/types.h"
  82 #include "ceres/internal/disable_warnings.h"
  83
  84 namespace ceres {
  85
  86 class CERES_EXPORT LossFunction {
  87  public:
  88   virtual ~LossFunction() {}
  89
  90   // For a residual vector with squared 2-norm 'sq_norm', this method
  91   // is required to fill in the value and derivatives of the loss
  92   // function (rho in this example):
  93   //
  94   //   out[0] = rho(sq_norm),
  95   //   out[1] = rho'(sq_norm),
  96   //   out[2] = rho''(sq_norm),
  97   //
  98   // Here the convention is that the contribution of a term to the
  99   // cost function is given by 1/2 rho(s),  where
 100   //
 101   //   s = ||residuals||^2.
 102   //
 103   // Calling the method with a negative value of 's' is an error and
 104   // the implementations are not required to handle that case.
 105   //
 106   // Most sane choices of rho() satisfy:
 107   //
 108   //   rho(0) = 0,
 109   //   rho'(0) = 1,
 110   //   rho'(s) < 1 in outlier region,
 111   //   rho''(s) < 0 in outlier region,
 112   //
 113   // so that they mimic the least squares cost for small residuals.
 114   virtual void Evaluate(double sq_norm, double out[3]) const = 0;
 115 };
 116
 117 // Some common implementations follow below.
 118 //
 119 // Note: in the region of interest (i.e. s < 3) we have:
 120 //   TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
 121
 122
 123 // This corresponds to no robustification.
 124 //
 125 //   rho(s) = s
 126 //
 127 // At s = 0: rho = [0, 1, 0].
 128 //
 129 // It is not normally necessary to use this, as passing NULL for the
 130 // loss function when building the problem accomplishes the same
 131 // thing.
 132 class CERES_EXPORT TrivialLoss : public LossFunction {
 133  public:
 134   virtual void Evaluate(double, double*) const;
 135 };
 136
 137 // Scaling
 138 // -------
 139 // Given one robustifier
 140 //   s -> rho(s)
 141 // one can change the length scale at which robustification takes
 142 // place, by adding a scale factor 'a' as follows:
 143 //
 144 //   s -> a^2 rho(s / a^2).
 145 //
 146 // The first and second derivatives are:
 147 //
 148 //   s -> rho'(s / a^2),
 149 //   s -> (1 / a^2) rho''(s / a^2),
 150 //
 151 // but the behaviour near s = 0 is the same as the original function,
 152 // i.e.
 153 //
 154 //   rho(s) = s + higher order terms,
 155 //   a^2 rho(s / a^2) = s + higher order terms.
 156 //
 157 // The scalar 'a' should be positive.
 158 //
 159 // The reason for the appearance of squaring is that 'a' is in the
 160 // units of the residual vector norm whereas 's' is a squared
 161 // norm. For applications it is more convenient to specify 'a' than
 162 // its square. The commonly used robustifiers below are described in
 163 // un-scaled format (a = 1) but their implementations work for any
 164 // non-zero value of 'a'.
 165
 166 // Huber.
 167 //
 168 //   rho(s) = s               for s <= 1,
 169 //   rho(s) = 2 sqrt(s) - 1   for s >= 1.
 170 //
 171 // At s = 0: rho = [0, 1, 0].
 172 //
 173 // The scaling parameter 'a' corresponds to 'delta' on this page:
 174 //   http://en.wikipedia.org/wiki/Huber_Loss_Function
 175 class CERES_EXPORT HuberLoss : public LossFunction {
 176  public:
 177   explicit HuberLoss(double a) : a_(a), b_(a * a) { }
 178   virtual void Evaluate(double, double*) const;
 179
 180  private:
 181   const double a_;
 182   // b = a^2.
 183   const double b_;
 184 };
 185
 186 // Soft L1, similar to Huber but smooth.
 187 //
 188 //   rho(s) = 2 (sqrt(1 + s) - 1).
 189 //
 190 // At s = 0: rho = [0, 1, -1/2].
 191 class CERES_EXPORT SoftLOneLoss : public LossFunction {
 192  public:
 193   explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
 194   virtual void Evaluate(double, double*) const;
 195
 196  private:
 197   // b = a^2.
 198   const double b_;
 199   // c = 1 / a^2.
 200   const double c_;
 201 };
 202
 203 // Inspired by the Cauchy distribution
 204 //
 205 //   rho(s) = log(1 + s).
 206 //
 207 // At s = 0: rho = [0, 1, -1].
 208 class CERES_EXPORT CauchyLoss : public LossFunction {
 209  public:
 210   explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
 211   virtual void Evaluate(double, double*) const;
 212
 213  private:
 214   // b = a^2.
 215   const double b_;
 216   // c = 1 / a^2.
 217   const double c_;
 218 };
 219
 220 // Loss that is capped beyond a certain level using the arc-tangent function.
 221 // The scaling parameter 'a' determines the level where falloff occurs.
 222 // For costs much smaller than 'a', the loss function is linear and behaves like
 223 // TrivialLoss, and for values much larger than 'a' the value asymptotically
 224 // approaches the constant value of a * PI / 2.
 225 //
 226 //   rho(s) = a atan(s / a).
 227 //
 228 // At s = 0: rho = [0, 1, 0].
 229 class CERES_EXPORT ArctanLoss : public LossFunction {
 230  public:
 231   explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
 232   virtual void Evaluate(double, double*) const;
 233
 234  private:
 235   const double a_;
 236   // b = 1 / a^2.
 237   const double b_;
 238 };
 239
 240 // Loss function that maps to approximately zero cost in a range around the
 241 // origin, and reverts to linear in error (quadratic in cost) beyond this range.
 242 // The tolerance parameter 'a' sets the nominal point at which the
 243 // transition occurs, and the transition size parameter 'b' sets the nominal
 244 // distance over which most of the transition occurs. Both a and b must be
 245 // greater than zero, and typically b will be set to a fraction of a.
 246 // The slope rho'[s] varies smoothly from about 0 at s <= a - b to
 247 // about 1 at s >= a + b.
 248 //
 249 // The term is computed as:
 250 //
 251 //   rho(s) = b log(1 + exp((s - a) / b)) - c0.
 252 //
 253 // where c0 is chosen so that rho(0) == 0
 254 //
 255 //   c0 = b log(1 + exp(-a / b)
 256 //
 257 // This has the following useful properties:
 258 //
 259 //   rho(s) == 0               for s = 0
 260 //   rho'(s) ~= 0              for s << a - b
 261 //   rho'(s) ~= 1              for s >> a + b
 262 //   rho''(s) > 0              for all s
 263 //
 264 // In addition, all derivatives are continuous, and the curvature is
 265 // concentrated in the range a - b to a + b.
 266 //
 267 // At s = 0: rho = [0, ~0, ~0].
 268 class CERES_EXPORT TolerantLoss : public LossFunction {
 269  public:
 270   explicit TolerantLoss(double a, double b);
 271   virtual void Evaluate(double, double*) const;
 272
 273  private:
 274   const double a_, b_, c_;
 275 };
 276
 277 // This is the Tukey biweight loss function which aggressively
 278 // attempts to suppress large errors.
 279 //
 280 // The term is computed as:
 281 //
 282 //   rho(s) = a^2 / 6 * (1 - (1 - s / a^2)^3 )   for s <= a^2,
 283 //   rho(s) = a^2 / 6                            for s >  a^2.
 284 //
 285 // At s = 0: rho = [0, 0.5, -1 / a^2]
 286 class CERES_EXPORT TukeyLoss : public ceres::LossFunction {
 287  public:
 288   explicit TukeyLoss(double a) : a_squared_(a * a) { }
 289   virtual void Evaluate(double, double*) const;
 290
 291  private:
 292   const double a_squared_;
 293 };
 294
 295 // Composition of two loss functions.  The error is the result of first
 296 // evaluating g followed by f to yield the composition f(g(s)).
 297 // The loss functions must not be NULL.
 298 class CERES_EXPORT ComposedLoss : public LossFunction {
 299  public:
 300   explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
 301                         const LossFunction* g, Ownership ownership_g);
 302   virtual ~ComposedLoss();
 303   virtual void Evaluate(double, double*) const;
 304
 305  private:
 306   internal::scoped_ptr<const LossFunction> f_, g_;
 307   const Ownership ownership_f_, ownership_g_;
 308 };
 309
 310 // The discussion above has to do with length scaling: it affects the space
 311 // in which s is measured. Sometimes you want to simply scale the output
 312 // value of the robustifier. For example, you might want to weight
 313 // different error terms differently (e.g., weight pixel reprojection
 314 // errors differently from terrain errors).
 315 //
 316 // If rho is the wrapped robustifier, then this simply outputs
 317 // s -> a * rho(s)
 318 //
 319 // The first and second derivatives are, not surprisingly
 320 // s -> a * rho'(s)
 321 // s -> a * rho''(s)
 322 //
 323 // Since we treat the a NULL Loss function as the Identity loss
 324 // function, rho = NULL is a valid input and will result in the input
 325 // being scaled by a. This provides a simple way of implementing a
 326 // scaled ResidualBlock.
 327 class CERES_EXPORT ScaledLoss : public LossFunction {
 328  public:
 329   // Constructs a ScaledLoss wrapping another loss function. Takes
 330   // ownership of the wrapped loss function or not depending on the
 331   // ownership parameter.
 332   ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
 333       rho_(rho), a_(a), ownership_(ownership) { }
 334
 335   virtual ~ScaledLoss() {
 336     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
 337       rho_.release();
 338     }
 339   }
 340   virtual void Evaluate(double, double*) const;
 341
 342  private:
 343   internal::scoped_ptr<const LossFunction> rho_;
 344   const double a_;
 345   const Ownership ownership_;
 346   CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
 347 };
 348
 349 // Sometimes after the optimization problem has been constructed, we
 350 // wish to mutate the scale of the loss function. For example, when
 351 // performing estimation from data which has substantial outliers,
 352 // convergence can be improved by starting out with a large scale,
 353 // optimizing the problem and then reducing the scale. This can have
 354 // better convergence behaviour than just using a loss function with a
 355 // small scale.
 356 //
 357 // This templated class allows the user to implement a loss function
 358 // whose scale can be mutated after an optimization problem has been
 359 // constructed.
 360 //
 361 // Since we treat the a NULL Loss function as the Identity loss
 362 // function, rho = NULL is a valid input.
 363 //
 364 // Example usage
 365 //
 366 //  Problem problem;
 367 //
 368 //  // Add parameter blocks
 369 //
 370 //  CostFunction* cost_function =
 371 //    new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
 372 //      new UW_Camera_Mapper(feature_x, feature_y));
 373 //
 374 //  LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
 375 //
 376 //  problem.AddResidualBlock(cost_function, loss_function, parameters);
 377 //
 378 //  Solver::Options options;
 379 //  Solger::Summary summary;
 380 //
 381 //  Solve(options, &problem, &summary)
 382 //
 383 //  loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
 384 //
 385 //  Solve(options, &problem, &summary)
 386 //
 387 class CERES_EXPORT LossFunctionWrapper : public LossFunction {
 388  public:
 389   LossFunctionWrapper(LossFunction* rho, Ownership ownership)
 390       : rho_(rho), ownership_(ownership) {
 391   }
 392
 393   virtual ~LossFunctionWrapper() {
 394     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
 395       rho_.release();
 396     }
 397   }
 398
 399   virtual void Evaluate(double sq_norm, double out[3]) const {
 400     if (rho_.get() == NULL) {
 401       out[0] = sq_norm;
 402       out[1] = 1.0;
 403       out[2] = 0.0;
 404     }
 405     else {
 406       rho_->Evaluate(sq_norm, out);
 407     }
 408   }
 409
 410   void Reset(LossFunction* rho, Ownership ownership) {
 411     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
 412       rho_.release();
 413     }
 414     rho_.reset(rho);
 415     ownership_ = ownership;
 416   }
 417
 418  private:
 419   internal::scoped_ptr<const LossFunction> rho_;
 420   Ownership ownership_;
 421   CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
 422 };
 423
 424 }  // namespace ceres
 425
 426 #include "ceres/internal/reenable_warnings.h"
 427
 428 #endif  // CERES_PUBLIC_LOSS_FUNCTION_H_