include/ceres/covariance.h

   1 // Ceres Solver - A fast non-linear least squares minimizer
   2 // Copyright 2015 Google Inc. All rights reserved.
   3 // http://ceres-solver.org/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are met:
   7 //
   8 // * Redistributions of source code must retain the above copyright notice,
   9 //   this list of conditions and the following disclaimer.
  10 // * Redistributions in binary form must reproduce the above copyright notice,
  11 //   this list of conditions and the following disclaimer in the documentation
  12 //   and/or other materials provided with the distribution.
  13 // * Neither the name of Google Inc. nor the names of its contributors may be
  14 //   used to endorse or promote products derived from this software without
  15 //   specific prior written permission.
  16 //
  17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 // POSSIBILITY OF SUCH DAMAGE.
  28 //
  29 // Author: sameeragarwal@google.com (Sameer Agarwal)
  30
  31 #ifndef CERES_PUBLIC_COVARIANCE_H_
  32 #define CERES_PUBLIC_COVARIANCE_H_
  33
  34 #include <utility>
  35 #include <vector>
  36 #include "ceres/internal/port.h"
  37 #include "ceres/internal/scoped_ptr.h"
  38 #include "ceres/types.h"
  39 #include "ceres/internal/disable_warnings.h"
  40
  41 namespace ceres {
  42
  43 class Problem;
  44
  45 namespace internal {
  46 class CovarianceImpl;
  47 }  // namespace internal
  48
  49 // WARNING
  50 // =======
  51 // It is very easy to use this class incorrectly without understanding
  52 // the underlying mathematics. Please read and understand the
  53 // documentation completely before attempting to use this class.
  54 //
  55 //
  56 // This class allows the user to evaluate the covariance for a
  57 // non-linear least squares problem and provides random access to its
  58 // blocks
  59 //
  60 // Background
  61 // ==========
  62 // One way to assess the quality of the solution returned by a
  63 // non-linear least squares solve is to analyze the covariance of the
  64 // solution.
  65 //
  66 // Let us consider the non-linear regression problem
  67 //
  68 //   y = f(x) + N(0, I)
  69 //
  70 // i.e., the observation y is a random non-linear function of the
  71 // independent variable x with mean f(x) and identity covariance. Then
  72 // the maximum likelihood estimate of x given observations y is the
  73 // solution to the non-linear least squares problem:
  74 //
  75 //  x* = arg min_x |f(x)|^2
  76 //
  77 // And the covariance of x* is given by
  78 //
  79 //  C(x*) = inverse[J'(x*)J(x*)]
  80 //
  81 // Here J(x*) is the Jacobian of f at x*. The above formula assumes
  82 // that J(x*) has full column rank.
  83 //
  84 // If J(x*) is rank deficient, then the covariance matrix C(x*) is
  85 // also rank deficient and is given by
  86 //
  87 //  C(x*) =  pseudoinverse[J'(x*)J(x*)]
  88 //
  89 // Note that in the above, we assumed that the covariance
  90 // matrix for y was identity. This is an important assumption. If this
  91 // is not the case and we have
  92 //
  93 //  y = f(x) + N(0, S)
  94 //
  95 // Where S is a positive semi-definite matrix denoting the covariance
  96 // of y, then the maximum likelihood problem to be solved is
  97 //
  98 //  x* = arg min_x f'(x) inverse[S] f(x)
  99 //
 100 // and the corresponding covariance estimate of x* is given by
 101 //
 102 //  C(x*) = inverse[J'(x*) inverse[S] J(x*)]
 103 //
 104 // So, if it is the case that the observations being fitted to have a
 105 // covariance matrix not equal to identity, then it is the user's
 106 // responsibility that the corresponding cost functions are correctly
 107 // scaled, e.g. in the above case the cost function for this problem
 108 // should evaluate S^{-1/2} f(x) instead of just f(x), where S^{-1/2}
 109 // is the inverse square root of the covariance matrix S.
 110 //
 111 // This class allows the user to evaluate the covariance for a
 112 // non-linear least squares problem and provides random access to its
 113 // blocks. The computation assumes that the CostFunctions compute
 114 // residuals such that their covariance is identity.
 115 //
 116 // Since the computation of the covariance matrix requires computing
 117 // the inverse of a potentially large matrix, this can involve a
 118 // rather large amount of time and memory. However, it is usually the
 119 // case that the user is only interested in a small part of the
 120 // covariance matrix. Quite often just the block diagonal. This class
 121 // allows the user to specify the parts of the covariance matrix that
 122 // she is interested in and then uses this information to only compute
 123 // and store those parts of the covariance matrix.
 124 //
 125 // Rank of the Jacobian
 126 // --------------------
 127 // As we noted above, if the jacobian is rank deficient, then the
 128 // inverse of J'J is not defined and instead a pseudo inverse needs to
 129 // be computed.
 130 //
 131 // The rank deficiency in J can be structural -- columns which are
 132 // always known to be zero or numerical -- depending on the exact
 133 // values in the Jacobian.
 134 //
 135 // Structural rank deficiency occurs when the problem contains
 136 // parameter blocks that are constant. This class correctly handles
 137 // structural rank deficiency like that.
 138 //
 139 // Numerical rank deficiency, where the rank of the matrix cannot be
 140 // predicted by its sparsity structure and requires looking at its
 141 // numerical values is more complicated. Here again there are two
 142 // cases.
 143 //
 144 //   a. The rank deficiency arises from overparameterization. e.g., a
 145 //   four dimensional quaternion used to parameterize SO(3), which is
 146 //   a three dimensional manifold. In cases like this, the user should
 147 //   use an appropriate LocalParameterization. Not only will this lead
 148 //   to better numerical behaviour of the Solver, it will also expose
 149 //   the rank deficiency to the Covariance object so that it can
 150 //   handle it correctly.
 151 //
 152 //   b. More general numerical rank deficiency in the Jacobian
 153 //   requires the computation of the so called Singular Value
 154 //   Decomposition (SVD) of J'J. We do not know how to do this for
 155 //   large sparse matrices efficiently. For small and moderate sized
 156 //   problems this is done using dense linear algebra.
 157 //
 158 // Gauge Invariance
 159 // ----------------
 160 // In structure from motion (3D reconstruction) problems, the
 161 // reconstruction is ambiguous upto a similarity transform. This is
 162 // known as a Gauge Ambiguity. Handling Gauges correctly requires the
 163 // use of SVD or custom inversion algorithms. For small problems the
 164 // user can use the dense algorithm. For more details see
 165 //
 166 // Ken-ichi Kanatani, Daniel D. Morris: Gauges and gauge
 167 // transformations for uncertainty description of geometric structure
 168 // with indeterminacy. IEEE Transactions on Information Theory 47(5):
 169 // 2017-2028 (2001)
 170 //
 171 // Example Usage
 172 // =============
 173 //
 174 //  double x[3];
 175 //  double y[2];
 176 //
 177 //  Problem problem;
 178 //  problem.AddParameterBlock(x, 3);
 179 //  problem.AddParameterBlock(y, 2);
 180 //  <Build Problem>
 181 //  <Solve Problem>
 182 //
 183 //  Covariance::Options options;
 184 //  Covariance covariance(options);
 185 //
 186 //  std::vector<std::pair<const double*, const double*> > covariance_blocks;
 187 //  covariance_blocks.push_back(make_pair(x, x));
 188 //  covariance_blocks.push_back(make_pair(y, y));
 189 //  covariance_blocks.push_back(make_pair(x, y));
 190 //
 191 //  CHECK(covariance.Compute(covariance_blocks, &problem));
 192 //
 193 //  double covariance_xx[3 * 3];
 194 //  double covariance_yy[2 * 2];
 195 //  double covariance_xy[3 * 2];
 196 //  covariance.GetCovarianceBlock(x, x, covariance_xx)
 197 //  covariance.GetCovarianceBlock(y, y, covariance_yy)
 198 //  covariance.GetCovarianceBlock(x, y, covariance_xy)
 199 //
 200 class CERES_EXPORT Covariance {
 201  public:
 202   struct CERES_EXPORT Options {
 203     Options() {
 204       algorithm_type = SPARSE_QR;
 205
 206       // Eigen's QR factorization is always available.
 207       sparse_linear_algebra_library_type = EIGEN_SPARSE;
 208 #if !defined(CERES_NO_SUITESPARSE)
 209       sparse_linear_algebra_library_type = SUITE_SPARSE;
 210 #endif
 211
 212       min_reciprocal_condition_number = 1e-14;
 213       null_space_rank = 0;
 214       num_threads = 1;
 215       apply_loss_function = true;
 216     }
 217
 218     // Sparse linear algebra library to use when a sparse matrix
 219     // factorization is being used to compute the covariance matrix.
 220     //
 221     // Currently this only applies to SPARSE_QR.
 222     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
 223
 224     // Ceres supports two different algorithms for covariance
 225     // estimation, which represent different tradeoffs in speed,
 226     // accuracy and reliability.
 227     //
 228     // 1. DENSE_SVD uses Eigen's JacobiSVD to perform the
 229     //    computations. It computes the singular value decomposition
 230     //
 231     //      U * S * V' = J
 232     //
 233     //    and then uses it to compute the pseudo inverse of J'J as
 234     //
 235     //      pseudoinverse[J'J]^ = V * pseudoinverse[S] * V'
 236     //
 237     //    It is an accurate but slow method and should only be used
 238     //    for small to moderate sized problems. It can handle
 239     //    full-rank as well as rank deficient Jacobians.
 240     //
 241     // 2. SPARSE_QR uses the sparse QR factorization algorithm
 242     //    to compute the decomposition
 243     //
 244     //      Q * R = J
 245     //
 246     //    [J'J]^-1 = [R*R']^-1
 247     //
 248     // SPARSE_QR is not capable of computing the covariance if the
 249     // Jacobian is rank deficient. Depending on the value of
 250     // Covariance::Options::sparse_linear_algebra_library_type, either
 251     // Eigen's Sparse QR factorization algorithm will be used or
 252     // SuiteSparse's high performance SuiteSparseQR algorithm will be
 253     // used.
 254     CovarianceAlgorithmType algorithm_type;
 255
 256     // If the Jacobian matrix is near singular, then inverting J'J
 257     // will result in unreliable results, e.g, if
 258     //
 259     //   J = [1.0 1.0         ]
 260     //       [1.0 1.0000001   ]
 261     //
 262     // which is essentially a rank deficient matrix, we have
 263     //
 264     //   inv(J'J) = [ 2.0471e+14  -2.0471e+14]
 265     //              [-2.0471e+14   2.0471e+14]
 266     //
 267     // This is not a useful result. Therefore, by default
 268     // Covariance::Compute will return false if a rank deficient
 269     // Jacobian is encountered. How rank deficiency is detected
 270     // depends on the algorithm being used.
 271     //
 272     // 1. DENSE_SVD
 273     //
 274     //      min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
 275     //
 276     //    where min_sigma and max_sigma are the minimum and maxiumum
 277     //    singular values of J respectively.
 278     //
 279     // 2. SPARSE_QR
 280     //
 281     //      rank(J) < num_col(J)
 282     //
 283     //   Here rank(J) is the estimate of the rank of J returned by the
 284     //   sparse QR factorization algorithm. It is a fairly reliable
 285     //   indication of rank deficiency.
 286     //
 287     double min_reciprocal_condition_number;
 288
 289     // When using DENSE_SVD, the user has more control in dealing with
 290     // singular and near singular covariance matrices.
 291     //
 292     // As mentioned above, when the covariance matrix is near
 293     // singular, instead of computing the inverse of J'J, the
 294     // Moore-Penrose pseudoinverse of J'J should be computed.
 295     //
 296     // If J'J has the eigen decomposition (lambda_i, e_i), where
 297     // lambda_i is the i^th eigenvalue and e_i is the corresponding
 298     // eigenvector, then the inverse of J'J is
 299     //
 300     //   inverse[J'J] = sum_i e_i e_i' / lambda_i
 301     //
 302     // and computing the pseudo inverse involves dropping terms from
 303     // this sum that correspond to small eigenvalues.
 304     //
 305     // How terms are dropped is controlled by
 306     // min_reciprocal_condition_number and null_space_rank.
 307     //
 308     // If null_space_rank is non-negative, then the smallest
 309     // null_space_rank eigenvalue/eigenvectors are dropped
 310     // irrespective of the magnitude of lambda_i. If the ratio of the
 311     // smallest non-zero eigenvalue to the largest eigenvalue in the
 312     // truncated matrix is still below
 313     // min_reciprocal_condition_number, then the Covariance::Compute()
 314     // will fail and return false.
 315     //
 316     // Setting null_space_rank = -1 drops all terms for which
 317     //
 318     //   lambda_i / lambda_max < min_reciprocal_condition_number.
 319     //
 320     // This option has no effect on the SUITE_SPARSE_QR and
 321     // EIGEN_SPARSE_QR algorithms.
 322     int null_space_rank;
 323
 324     int num_threads;
 325
 326     // Even though the residual blocks in the problem may contain loss
 327     // functions, setting apply_loss_function to false will turn off
 328     // the application of the loss function to the output of the cost
 329     // function and in turn its effect on the covariance.
 330     //
 331     // TODO(sameergaarwal): Expand this based on Jim's experiments.
 332     bool apply_loss_function;
 333   };
 334
 335   explicit Covariance(const Options& options);
 336   ~Covariance();
 337
 338   // Compute a part of the covariance matrix.
 339   //
 340   // The vector covariance_blocks, indexes into the covariance matrix
 341   // block-wise using pairs of parameter blocks. This allows the
 342   // covariance estimation algorithm to only compute and store these
 343   // blocks.
 344   //
 345   // Since the covariance matrix is symmetric, if the user passes
 346   // (block1, block2), then GetCovarianceBlock can be called with
 347   // block1, block2 as well as block2, block1.
 348   //
 349   // covariance_blocks cannot contain duplicates. Bad things will
 350   // happen if they do.
 351   //
 352   // Note that the list of covariance_blocks is only used to determine
 353   // what parts of the covariance matrix are computed. The full
 354   // Jacobian is used to do the computation, i.e. they do not have an
 355   // impact on what part of the Jacobian is used for computation.
 356   //
 357   // The return value indicates the success or failure of the
 358   // covariance computation. Please see the documentation for
 359   // Covariance::Options for more on the conditions under which this
 360   // function returns false.
 361   bool Compute(
 362       const std::vector<std::pair<const double*,
 363                                   const double*> >& covariance_blocks,
 364       Problem* problem);
 365
 366   // Compute a part of the covariance matrix.
 367   //
 368   // The vector parameter_blocks contains the parameter blocks that
 369   // are used for computing the covariance matrix. From this vector
 370   // all covariance pairs are generated. This allows the covariance
 371   // estimation algorithm to only compute and store these blocks.
 372   //
 373   // parameter_blocks cannot contain duplicates. Bad things will
 374   // happen if they do.
 375   //
 376   // Note that the list of covariance_blocks is only used to determine
 377   // what parts of the covariance matrix are computed. The full
 378   // Jacobian is used to do the computation, i.e. they do not have an
 379   // impact on what part of the Jacobian is used for computation.
 380   //
 381   // The return value indicates the success or failure of the
 382   // covariance computation. Please see the documentation for
 383   // Covariance::Options for more on the conditions under which this
 384   // function returns false.
 385   bool Compute(const std::vector<const double*>& parameter_blocks,
 386                Problem* problem);
 387
 388   // Return the block of the cross-covariance matrix corresponding to
 389   // parameter_block1 and parameter_block2.
 390   //
 391   // Compute must be called before the first call to
 392   // GetCovarianceBlock and the pair <parameter_block1,
 393   // parameter_block2> OR the pair <parameter_block2,
 394   // parameter_block1> must have been present in the vector
 395   // covariance_blocks when Compute was called. Otherwise
 396   // GetCovarianceBlock will return false.
 397   //
 398   // covariance_block must point to a memory location that can store a
 399   // parameter_block1_size x parameter_block2_size matrix. The
 400   // returned covariance will be a row-major matrix.
 401   bool GetCovarianceBlock(const double* parameter_block1,
 402                           const double* parameter_block2,
 403                           double* covariance_block) const;
 404
 405   // Return the block of the cross-covariance matrix corresponding to
 406   // parameter_block1 and parameter_block2.
 407   // Returns cross-covariance in the tangent space if a local
 408   // parameterization is associated with either parameter block;
 409   // else returns cross-covariance in the ambient space.
 410   //
 411   // Compute must be called before the first call to
 412   // GetCovarianceBlock and the pair <parameter_block1,
 413   // parameter_block2> OR the pair <parameter_block2,
 414   // parameter_block1> must have been present in the vector
 415   // covariance_blocks when Compute was called. Otherwise
 416   // GetCovarianceBlock will return false.
 417   //
 418   // covariance_block must point to a memory location that can store a
 419   // parameter_block1_local_size x parameter_block2_local_size matrix. The
 420   // returned covariance will be a row-major matrix.
 421   bool GetCovarianceBlockInTangentSpace(const double* parameter_block1,
 422                                         const double* parameter_block2,
 423                                         double* covariance_block) const;
 424
 425   // Return the covariance matrix corresponding to all parameter_blocks.
 426   //
 427   // Compute must be called before calling GetCovarianceMatrix and all
 428   // parameter_blocks must have been present in the vector
 429   // parameter_blocks when Compute was called. Otherwise
 430   // GetCovarianceMatrix returns false.
 431   //
 432   // covariance_matrix must point to a memory location that can store
 433   // the size of the covariance matrix. The covariance matrix will be
 434   // a square matrix whose row and column count is equal to the sum of
 435   // the sizes of the individual parameter blocks. The covariance
 436   // matrix will be a row-major matrix.
 437   bool GetCovarianceMatrix(const std::vector<const double *> &parameter_blocks,
 438                            double *covariance_matrix);
 439
 440   // Return the covariance matrix corresponding to parameter_blocks
 441   // in the tangent space if a local parameterization is associated
 442   // with one of the parameter blocks else returns the covariance
 443   // matrix in the ambient space.
 444   //
 445   // Compute must be called before calling GetCovarianceMatrix and all
 446   // parameter_blocks must have been present in the vector
 447   // parameters_blocks when Compute was called. Otherwise
 448   // GetCovarianceMatrix returns false.
 449   //
 450   // covariance_matrix must point to a memory location that can store
 451   // the size of the covariance matrix. The covariance matrix will be
 452   // a square matrix whose row and column count is equal to the sum of
 453   // the sizes of the tangent spaces of the individual parameter
 454   // blocks. The covariance matrix will be a row-major matrix.
 455   bool GetCovarianceMatrixInTangentSpace(
 456       const std::vector<const double*>& parameter_blocks,
 457       double* covariance_matrix);
 458
 459  private:
 460   internal::scoped_ptr<internal::CovarianceImpl> impl_;
 461 };
 462
 463 }  // namespace ceres
 464
 465 #include "ceres/internal/reenable_warnings.h"
 466
 467 #endif  // CERES_PUBLIC_COVARIANCE_H_