1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2015 Google Inc. All rights reserved.
3 // http://ceres-solver.org/
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
8 // * Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 // this list of conditions and the following disclaimer in the documentation
12 // and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 // used to endorse or promote products derived from this software without
15 // specific prior written permission.
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
31 #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
32 #define CERES_INTERNAL_SCHUR_ELIMINATOR_H_
36 #include "ceres/mutex.h"
37 #include "ceres/block_random_access_matrix.h"
38 #include "ceres/block_sparse_matrix.h"
39 #include "ceres/block_structure.h"
40 #include "ceres/linear_solver.h"
41 #include "ceres/internal/eigen.h"
42 #include "ceres/internal/scoped_ptr.h"
47 // Classes implementing the SchurEliminatorBase interface implement
48 // variable elimination for linear least squares problems. Assuming
49 // that the input linear system Ax = b can be partitioned into
53 // Where x = [y;z] is a partition of the variables. The paritioning
54 // of the variables is such that, E'E is a block diagonal matrix. Or
55 // in other words, the parameter blocks in E form an independent set
56 // of the of the graph implied by the block matrix A'A. Then, this
57 // class provides the functionality to compute the Schur complement
64 // S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
66 // This is the Eliminate operation, i.e., construct the linear system
67 // obtained by eliminating the variables in E.
69 // The eliminator also provides the reverse functionality, i.e. given
70 // values for z it can back substitute for the values of y, by solving the
75 // which is done by observing that
77 // y = (E'E)^(-1) [E'b - E'F z]
79 // The eliminator has a number of requirements.
81 // The rows of A are ordered so that for every variable block in y,
82 // all the rows containing that variable block occur as a vertically
83 // contiguous block. i.e the matrix A looks like
86 // A = [ y1 0 0 0 | z1 0 0 0 z5] 1
87 // [ y1 0 0 0 | z1 z2 0 0 0] 1
88 // [ 0 y2 0 0 | 0 0 z3 0 0] 2
89 // [ 0 0 y3 0 | z1 z2 z3 z4 z5] 3
90 // [ 0 0 y3 0 | z1 0 0 0 z5] 3
91 // [ 0 0 0 y4 | 0 0 0 0 z5] 4
92 // [ 0 0 0 y4 | 0 z2 0 0 0] 4
93 // [ 0 0 0 y4 | 0 0 0 0 0] 4
94 // [ 0 0 0 0 | z1 0 0 0 0] non chunk blocks
95 // [ 0 0 0 0 | 0 0 z3 z4 z5] non chunk blocks
97 // This structure should be reflected in the corresponding
98 // CompressedRowBlockStructure object associated with A. The linear
99 // system Ax = b should either be well posed or the array D below
100 // should be non-null and the diagonal matrix corresponding to it
101 // should be non-singular. For simplicity of exposition only the case
102 // with a null D is described.
104 // The usual way to do the elimination is as follows. Starting with
108 // we can form the normal equations,
110 // E'E y + E'F z = E'b
111 // F'E y + F'F z = F'b
113 // multiplying both sides of the first equation by (E'E)^(-1) and then
116 // F'E y + F'E (E'E)^(-1) E'F z = F'E (E'E)^(-1) E'b
117 // F'E y + F'F z = F'b
119 // now subtracting the two equations we get
121 // [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
123 // Instead of forming the normal equations and operating on them as
124 // general sparse matrices, the algorithm here deals with one
125 // parameter block in y at a time. The rows corresponding to a single
126 // parameter block yi are known as a chunk, and the algorithm operates
127 // on one chunk at a time. The mathematics remains the same since the
128 // reduced linear system can be shown to be the sum of the reduced
129 // linear systems for each chunk. This can be seen by observing two
132 // 1. E'E is a block diagonal matrix.
134 // 2. When E'F is computed, only the terms within a single chunk
135 // interact, i.e for y1 column blocks when transposed and multiplied
136 // with F, the only non-zero contribution comes from the blocks in
139 // Thus, the reduced linear system
141 // FF' - F'E (E'E)^(-1) E'F
143 // can be re-written as
145 // sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
147 // Where the sum is over chunks and E_k'E_k is dense matrix of size y1
150 // Advanced usage. Uptil now it has been assumed that the user would
151 // be interested in all of the Schur Complement S. However, it is also
152 // possible to use this eliminator to obtain an arbitrary submatrix of
153 // the full Schur complement. When the eliminator is generating the
154 // blocks of S, it asks the RandomAccessBlockMatrix instance passed to
155 // it if it has storage for that block. If it does, the eliminator
156 // computes/updates it, if not it is skipped. This is useful when one
157 // is interested in constructing a preconditioner based on the Schur
158 // Complement, e.g., computing the block diagonal of S so that it can
159 // be used as a preconditioner for an Iterative Substructuring based
160 // solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
161 // 2008 for an example of such use].
163 // Example usage: Please see schur_complement_solver.cc
164 class SchurEliminatorBase {
166 virtual ~SchurEliminatorBase() {}
168 // Initialize the eliminator. It is the user's responsibilty to call
169 // this function before calling Eliminate or BackSubstitute. It is
170 // also the caller's responsibilty to ensure that the
171 // CompressedRowBlockStructure object passed to this method is the
172 // same one (or is equivalent to) the one associated with the
173 // BlockSparseMatrix objects below.
175 // assume_full_rank_ete controls how the eliminator inverts with the
176 // diagonal blocks corresponding to e blocks in A'A. If
177 // assume_full_rank_ete is true, then a Cholesky factorization is
178 // used to compute the inverse, otherwise a singular value
179 // decomposition is used to compute the pseudo inverse.
180 virtual void Init(int num_eliminate_blocks,
181 bool assume_full_rank_ete,
182 const CompressedRowBlockStructure* bs) = 0;
184 // Compute the Schur complement system from the augmented linear
185 // least squares problem [A;D] x = [b;0]. The left hand side and the
186 // right hand side of the reduced linear system are returned in lhs
187 // and rhs respectively.
189 // It is the caller's responsibility to construct and initialize
190 // lhs. Depending upon the structure of the lhs object passed here,
191 // the full or a submatrix of the Schur complement will be computed.
193 // Since the Schur complement is a symmetric matrix, only the upper
194 // triangular part of the Schur complement is computed.
195 virtual void Eliminate(const BlockSparseMatrix* A,
198 BlockRandomAccessMatrix* lhs,
201 // Given values for the variables z in the F block of A, solve for
202 // the optimal values of the variables y corresponding to the E
204 virtual void BackSubstitute(const BlockSparseMatrix* A,
210 static SchurEliminatorBase* Create(const LinearSolver::Options& options);
213 // Templated implementation of the SchurEliminatorBase interface. The
214 // templating is on the sizes of the row, e and f blocks sizes in the
215 // input matrix. In many problems, the sizes of one or more of these
216 // blocks are constant, in that case, its worth passing these
217 // parameters as template arguments so that they are visible to the
218 // compiler and can be used for compile time optimization of the low
219 // level linear algebra routines.
221 // This implementation is mulithreaded using OpenMP. The level of
222 // parallelism is controlled by LinearSolver::Options::num_threads.
223 template <int kRowBlockSize = Eigen::Dynamic,
224 int kEBlockSize = Eigen::Dynamic,
225 int kFBlockSize = Eigen::Dynamic >
226 class SchurEliminator : public SchurEliminatorBase {
228 explicit SchurEliminator(const LinearSolver::Options& options)
229 : num_threads_(options.num_threads) {
232 // SchurEliminatorBase Interface
233 virtual ~SchurEliminator();
234 virtual void Init(int num_eliminate_blocks,
235 bool assume_full_rank_ete,
236 const CompressedRowBlockStructure* bs);
237 virtual void Eliminate(const BlockSparseMatrix* A,
240 BlockRandomAccessMatrix* lhs,
242 virtual void BackSubstitute(const BlockSparseMatrix* A,
249 // Chunk objects store combinatorial information needed to
250 // efficiently eliminate a whole chunk out of the least squares
251 // problem. Consider the first chunk in the example matrix above.
253 // [ y1 0 0 0 | z1 0 0 0 z5]
254 // [ y1 0 0 0 | z1 z2 0 0 0]
256 // One of the intermediate quantities that needs to be calculated is
257 // for each row the product of the y block transposed with the
258 // non-zero z block, and the sum of these blocks across rows. A
259 // temporary array "buffer_" is used for computing and storing them
260 // and the buffer_layout maps the indices of the z-blocks to
261 // position in the buffer_ array. The size of the chunk is the
262 // number of row blocks/residual blocks for the particular y block
265 // For the example chunk shown above,
269 // The entries of buffer_layout will be filled in the following order.
271 // buffer_layout[z1] = 0
272 // buffer_layout[z5] = y1 * z1
273 // buffer_layout[z2] = y1 * z1 + y1 * z5
274 typedef std::map<int, int> BufferLayoutType;
279 BufferLayoutType buffer_layout;
282 void ChunkDiagonalBlockAndGradient(
284 const BlockSparseMatrix* A,
286 int row_block_counter,
287 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
290 BlockRandomAccessMatrix* lhs);
292 void UpdateRhs(const Chunk& chunk,
293 const BlockSparseMatrix* A,
295 int row_block_counter,
296 const double* inverse_ete_g,
299 void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
300 const Matrix& inverse_eet,
301 const double* buffer,
302 const BufferLayoutType& buffer_layout,
303 BlockRandomAccessMatrix* lhs);
304 void EBlockRowOuterProduct(const BlockSparseMatrix* A,
306 BlockRandomAccessMatrix* lhs);
309 void NoEBlockRowsUpdate(const BlockSparseMatrix* A,
311 int row_block_counter,
312 BlockRandomAccessMatrix* lhs,
315 void NoEBlockRowOuterProduct(const BlockSparseMatrix* A,
317 BlockRandomAccessMatrix* lhs);
320 int num_eliminate_blocks_;
321 bool assume_full_rank_ete_;
323 // Block layout of the columns of the reduced linear system. Since
324 // the f blocks can be of varying size, this vector stores the
325 // position of each f block in the row/col of the reduced linear
326 // system. Thus lhs_row_layout_[i] is the row/col position of the
328 std::vector<int> lhs_row_layout_;
330 // Combinatorial structure of the chunks in A. For more information
331 // see the documentation of the Chunk object above.
332 std::vector<Chunk> chunks_;
334 // TODO(sameeragarwal): The following two arrays contain per-thread
335 // storage. They should be refactored into a per thread struct.
337 // Buffer to store the products of the y and z blocks generated
338 // during the elimination phase. buffer_ is of size num_threads *
339 // buffer_size_. Each thread accesses the chunk
341 // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
343 scoped_array<double> buffer_;
345 // Buffer to store per thread matrix matrix products used by
346 // ChunkOuterProduct. Like buffer_ it is of size num_threads *
347 // buffer_size_. Each thread accesses the chunk
349 // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_ -1]
351 scoped_array<double> chunk_outer_product_buffer_;
354 int uneliminated_row_begins_;
356 // Locks for the blocks in the right hand side of the reduced linear
358 std::vector<Mutex*> rhs_locks_;
361 } // namespace internal
364 #endif // CERES_INTERNAL_SCHUR_ELIMINATOR_H_