1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
29 solve A*x = b+w, with x and w subject to certain LCP conditions.
30 each x(i),w(i) must lie on one of the three line segments in the following
31 diagram. each line segment corresponds to one index set :
40 w=0 + +-----------------------+
47 +-------|-----------|-----------|----------> x(i)
50 the Dantzig algorithm proceeds as follows:
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60 simpler, because the starting point for x(i),w(i) is always on the dotted
61 line x=0 and x will only ever increase in one direction, so it can only hit
62 two out of the three line segments.
68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69 the implementation is split into an LCP problem object (btLCP) and an LCP
70 driver function. most optimization occurs in the btLCP object.
72 a naive implementation of the algorithm requires either a lot of data motion
73 or a lot of permutation-array lookup, because we are constantly re-ordering
74 rows and columns. to avoid this and make a more optimized algorithm, a
75 non-trivial data structure is used to represent the matrix A (this is
76 implemented in the fast version of the btLCP object).
78 during execution of this algorithm, some indexes in A are clamped (set C),
79 some are non-clamped (set N), and some are "don't care" (where x=0).
80 A,x,b,w (and other problem vectors) are permuted such that the clamped
81 indexes are first, the unclamped indexes are next, and the don't-care
82 indexes are last. this permutation is recorded in the array `p'.
83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84 the corresponding elements of p are swapped.
86 because the C and N elements are grouped together in the rows of A, we can do
87 lots of work with a fast dot product function. if A,x,etc were not permuted
88 and we only had a permutation array, then those dot products would be much
89 slower as we would have a permutation array lookup in some inner loops.
91 A is accessed through an array of row pointers, so that element (i,j) of the
92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93 we still have to actually move the data.
95 during execution of this algorithm we maintain an L*D*L' factorization of
96 the clamped submatrix of A (call it `AC') which is the top left nC*nC
97 submatrix of A. there are two ways we could arrange the rows/columns in AC.
99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem
100 when a row/column is removed from C, because then all the rows/columns of A
101 between the deleted index and the end of C need to be rotated downward.
102 this results in a lot of data motion and slows things down.
103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104 itself a permutation of the underlying A). this is what we do - the
105 permutation is recorded in the vector C. call this permutation A[C,C].
106 when a row/column is removed from C, all we have to do is swap two
107 rows/columns and manipulate C.
111 #include "btDantzigLCP.h"
113 #include <string.h> //memcpy
115 bool s_error = false;
117 //***************************************************************************
118 // code generation parameters
120 #define btLCP_FAST // use fast btLCP object
122 // option 1 : matrix row pointers (less data copying)
124 #define BTATYPE btScalar **
125 #define BTAROW(i) (m_A[i])
127 // option 2 : no matrix row pointers (slightly faster inner loops)
129 //#define BTATYPE btScalar *
130 //#define BTAROW(i) (m_A+(i)*m_nskip)
132 #define BTNUB_OPTIMIZATIONS
134 /* solve L*X=B, with B containing 1 right hand sides.
135 * L is an n*n lower triangular matrix with ones on the diagonal.
136 * L is stored by rows and its leading dimension is lskip.
137 * B is an n*1 matrix that contains the right hand sides.
138 * B is stored by columns and its leading dimension is also lskip.
139 * B is overwritten with X.
140 * this processes blocks of 2*2.
141 * if this is in the factorizer source file, n must be a multiple of 2.
144 static void btSolveL1_1(const btScalar *L, btScalar *B, int n, int lskip1)
146 /* declare variables - Z matrix, p and q vectors, etc */
147 btScalar Z11, m11, Z21, m21, p1, q1, p2, *ex;
150 /* compute all 2 x 1 blocks of X */
151 for (i = 0; i < n; i += 2)
153 /* compute all 2 x 1 block of X, from rows i..i+2-1 */
154 /* set the Z matrix to 0 */
157 ell = L + i * lskip1;
159 /* the inner loop that computes outer products and adds them to Z */
160 for (j = i - 2; j >= 0; j -= 2)
162 /* compute outer product and add it to the Z matrix */
170 /* compute outer product and add it to the Z matrix */
174 p2 = ell[1 + lskip1];
176 /* advance pointers */
181 /* end of inner loop */
183 /* compute left-over iterations */
187 /* compute outer product and add it to the Z matrix */
193 /* advance pointers */
199 /* finish computing the X(i) block */
203 Z21 = ex[1] - Z21 - p1 * Z11;
205 /* end of outer loop */
209 /* solve L*X=B, with B containing 2 right hand sides.
210 * L is an n*n lower triangular matrix with ones on the diagonal.
211 * L is stored by rows and its leading dimension is lskip.
212 * B is an n*2 matrix that contains the right hand sides.
213 * B is stored by columns and its leading dimension is also lskip.
214 * B is overwritten with X.
215 * this processes blocks of 2*2.
216 * if this is in the factorizer source file, n must be a multiple of 2.
219 static void btSolveL1_2(const btScalar *L, btScalar *B, int n, int lskip1)
221 /* declare variables - Z matrix, p and q vectors, etc */
222 btScalar Z11, m11, Z12, m12, Z21, m21, Z22, m22, p1, q1, p2, q2, *ex;
225 /* compute all 2 x 2 blocks of X */
226 for (i = 0; i < n; i += 2)
228 /* compute all 2 x 2 block of X, from rows i..i+2-1 */
229 /* set the Z matrix to 0 */
234 ell = L + i * lskip1;
236 /* the inner loop that computes outer products and adds them to Z */
237 for (j = i - 2; j >= 0; j -= 2)
239 /* compute outer product and add it to the Z matrix */
252 /* compute outer product and add it to the Z matrix */
258 p2 = ell[1 + lskip1];
261 /* advance pointers */
268 /* end of inner loop */
270 /* compute left-over iterations */
274 /* compute outer product and add it to the Z matrix */
283 /* advance pointers */
291 /* finish computing the X(i) block */
294 Z12 = ex[lskip1] - Z12;
297 Z21 = ex[1] - Z21 - p1 * Z11;
299 Z22 = ex[1 + lskip1] - Z22 - p1 * Z12;
300 ex[1 + lskip1] = Z22;
301 /* end of outer loop */
305 void btFactorLDLT(btScalar *A, btScalar *d, int n, int nskip1)
308 btScalar sum, *ell, *dee, dd, p1, p2, q1, q2, Z11, m11, Z21, m21, Z22, m22;
311 for (i = 0; i <= n - 2; i += 2)
313 /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
314 btSolveL1_2(A, A + i * nskip1, i, nskip1);
315 /* scale the elements in a 2 x i block at A(i,0), and also */
316 /* compute Z = the outer product matrix that we'll need. */
320 ell = A + i * nskip1;
322 for (j = i - 6; j >= 0; j -= 6)
338 p2 = ell[1 + nskip1];
343 ell[1 + nskip1] = q2;
351 p2 = ell[2 + nskip1];
356 ell[2 + nskip1] = q2;
364 p2 = ell[3 + nskip1];
369 ell[3 + nskip1] = q2;
377 p2 = ell[4 + nskip1];
382 ell[4 + nskip1] = q2;
390 p2 = ell[5 + nskip1];
395 ell[5 + nskip1] = q2;
405 /* compute left-over iterations */
425 /* solve for diagonal 2 x 2 block at A(i,i) */
427 Z21 = ell[nskip1] - Z21;
428 Z22 = ell[1 + nskip1] - Z22;
430 /* factorize 2 x 2 block Z,dee */
431 /* factorize row 1 */
432 dee[0] = btRecip(Z11);
433 /* factorize row 2 */
439 dee[1] = btRecip(Z22 - sum);
440 /* done factorizing 2 x 2 block */
443 /* compute the (less than 2) rows at the bottom */
450 btSolveL1_1(A, A + i * nskip1, i, nskip1);
451 /* scale the elements in a 1 x i block at A(i,0), and also */
452 /* compute Z = the outer product matrix that we'll need. */
454 ell = A + i * nskip1;
456 for (j = i - 6; j >= 0; j -= 6)
497 /* compute left-over iterations */
510 /* solve for diagonal 1 x 1 block at A(i,i) */
513 /* factorize 1 x 1 block Z,dee */
514 /* factorize row 1 */
515 dee[0] = btRecip(Z11);
516 /* done factorizing 1 x 1 block */
519 //default: *((char*)0)=0; /* this should never happen! */
523 /* solve L*X=B, with B containing 1 right hand sides.
524 * L is an n*n lower triangular matrix with ones on the diagonal.
525 * L is stored by rows and its leading dimension is lskip.
526 * B is an n*1 matrix that contains the right hand sides.
527 * B is stored by columns and its leading dimension is also lskip.
528 * B is overwritten with X.
529 * this processes blocks of 4*4.
530 * if this is in the factorizer source file, n must be a multiple of 4.
533 void btSolveL1(const btScalar *L, btScalar *B, int n, int lskip1)
535 /* declare variables - Z matrix, p and q vectors, etc */
536 btScalar Z11, Z21, Z31, Z41, p1, q1, p2, p3, p4, *ex;
538 int lskip2, lskip3, i, j;
539 /* compute lskip values */
542 /* compute all 4 x 1 blocks of X */
543 for (i = 0; i <= n - 4; i += 4)
545 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
546 /* set the Z matrix to 0 */
551 ell = L + i * lskip1;
553 /* the inner loop that computes outer products and adds them to Z */
554 for (j = i - 12; j >= 0; j -= 12)
556 /* load p and q values */
562 /* compute outer product and add it to the Z matrix */
567 /* load p and q values */
570 p2 = ell[1 + lskip1];
571 p3 = ell[1 + lskip2];
572 p4 = ell[1 + lskip3];
573 /* compute outer product and add it to the Z matrix */
578 /* load p and q values */
581 p2 = ell[2 + lskip1];
582 p3 = ell[2 + lskip2];
583 p4 = ell[2 + lskip3];
584 /* compute outer product and add it to the Z matrix */
589 /* load p and q values */
592 p2 = ell[3 + lskip1];
593 p3 = ell[3 + lskip2];
594 p4 = ell[3 + lskip3];
595 /* compute outer product and add it to the Z matrix */
600 /* load p and q values */
603 p2 = ell[4 + lskip1];
604 p3 = ell[4 + lskip2];
605 p4 = ell[4 + lskip3];
606 /* compute outer product and add it to the Z matrix */
611 /* load p and q values */
614 p2 = ell[5 + lskip1];
615 p3 = ell[5 + lskip2];
616 p4 = ell[5 + lskip3];
617 /* compute outer product and add it to the Z matrix */
622 /* load p and q values */
625 p2 = ell[6 + lskip1];
626 p3 = ell[6 + lskip2];
627 p4 = ell[6 + lskip3];
628 /* compute outer product and add it to the Z matrix */
633 /* load p and q values */
636 p2 = ell[7 + lskip1];
637 p3 = ell[7 + lskip2];
638 p4 = ell[7 + lskip3];
639 /* compute outer product and add it to the Z matrix */
644 /* load p and q values */
647 p2 = ell[8 + lskip1];
648 p3 = ell[8 + lskip2];
649 p4 = ell[8 + lskip3];
650 /* compute outer product and add it to the Z matrix */
655 /* load p and q values */
658 p2 = ell[9 + lskip1];
659 p3 = ell[9 + lskip2];
660 p4 = ell[9 + lskip3];
661 /* compute outer product and add it to the Z matrix */
666 /* load p and q values */
669 p2 = ell[10 + lskip1];
670 p3 = ell[10 + lskip2];
671 p4 = ell[10 + lskip3];
672 /* compute outer product and add it to the Z matrix */
677 /* load p and q values */
680 p2 = ell[11 + lskip1];
681 p3 = ell[11 + lskip2];
682 p4 = ell[11 + lskip3];
683 /* compute outer product and add it to the Z matrix */
688 /* advance pointers */
691 /* end of inner loop */
693 /* compute left-over iterations */
697 /* load p and q values */
703 /* compute outer product and add it to the Z matrix */
708 /* advance pointers */
712 /* finish computing the X(i) block */
716 Z21 = ex[1] - Z21 - p1 * Z11;
719 p2 = ell[1 + lskip2];
720 Z31 = ex[2] - Z31 - p1 * Z11 - p2 * Z21;
723 p2 = ell[1 + lskip3];
724 p3 = ell[2 + lskip3];
725 Z41 = ex[3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
727 /* end of outer loop */
729 /* compute rows at end that are not a multiple of block size */
732 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
733 /* set the Z matrix to 0 */
735 ell = L + i * lskip1;
737 /* the inner loop that computes outer products and adds them to Z */
738 for (j = i - 12; j >= 0; j -= 12)
740 /* load p and q values */
743 /* compute outer product and add it to the Z matrix */
745 /* load p and q values */
748 /* compute outer product and add it to the Z matrix */
750 /* load p and q values */
753 /* compute outer product and add it to the Z matrix */
755 /* load p and q values */
758 /* compute outer product and add it to the Z matrix */
760 /* load p and q values */
763 /* compute outer product and add it to the Z matrix */
765 /* load p and q values */
768 /* compute outer product and add it to the Z matrix */
770 /* load p and q values */
773 /* compute outer product and add it to the Z matrix */
775 /* load p and q values */
778 /* compute outer product and add it to the Z matrix */
780 /* load p and q values */
783 /* compute outer product and add it to the Z matrix */
785 /* load p and q values */
788 /* compute outer product and add it to the Z matrix */
790 /* load p and q values */
793 /* compute outer product and add it to the Z matrix */
795 /* load p and q values */
798 /* compute outer product and add it to the Z matrix */
800 /* advance pointers */
803 /* end of inner loop */
805 /* compute left-over iterations */
809 /* load p and q values */
812 /* compute outer product and add it to the Z matrix */
814 /* advance pointers */
818 /* finish computing the X(i) block */
824 /* solve L^T * x=b, with b containing 1 right hand side.
825 * L is an n*n lower triangular matrix with ones on the diagonal.
826 * L is stored by rows and its leading dimension is lskip.
827 * b is an n*1 matrix that contains the right hand side.
828 * b is overwritten with x.
829 * this processes blocks of 4.
832 void btSolveL1T(const btScalar *L, btScalar *B, int n, int lskip1)
834 /* declare variables - Z matrix, p and q vectors, etc */
835 btScalar Z11, m11, Z21, m21, Z31, m31, Z41, m41, p1, q1, p2, p3, p4, *ex;
839 /* special handling for L and B because we're solving L1 *transpose* */
840 L = L + (n - 1) * (lskip1 + 1);
843 /* compute lskip values */
846 /* compute all 4 x 1 blocks of X */
847 for (i = 0; i <= n - 4; i += 4)
849 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
850 /* set the Z matrix to 0 */
857 /* the inner loop that computes outer products and adds them to Z */
858 for (j = i - 4; j >= 0; j -= 4)
860 /* load p and q values */
866 /* compute outer product and add it to the Z matrix */
876 /* load p and q values */
882 /* compute outer product and add it to the Z matrix */
892 /* load p and q values */
898 /* compute outer product and add it to the Z matrix */
908 /* load p and q values */
914 /* compute outer product and add it to the Z matrix */
925 /* end of inner loop */
927 /* compute left-over iterations */
931 /* load p and q values */
937 /* compute outer product and add it to the Z matrix */
949 /* finish computing the X(i) block */
953 Z21 = ex[-1] - Z21 - p1 * Z11;
956 p2 = ell[-2 + lskip1];
957 Z31 = ex[-2] - Z31 - p1 * Z11 - p2 * Z21;
960 p2 = ell[-3 + lskip1];
961 p3 = ell[-3 + lskip2];
962 Z41 = ex[-3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
964 /* end of outer loop */
966 /* compute rows at end that are not a multiple of block size */
969 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
970 /* set the Z matrix to 0 */
974 /* the inner loop that computes outer products and adds them to Z */
975 for (j = i - 4; j >= 0; j -= 4)
977 /* load p and q values */
980 /* compute outer product and add it to the Z matrix */
984 /* load p and q values */
987 /* compute outer product and add it to the Z matrix */
991 /* load p and q values */
994 /* compute outer product and add it to the Z matrix */
998 /* load p and q values */
1001 /* compute outer product and add it to the Z matrix */
1006 /* end of inner loop */
1008 /* compute left-over iterations */
1012 /* load p and q values */
1015 /* compute outer product and add it to the Z matrix */
1021 /* finish computing the X(i) block */
1027 void btVectorScale(btScalar *a, const btScalar *d, int n)
1029 btAssert(a && d && n >= 0);
1030 for (int i = 0; i < n; i++)
1036 void btSolveLDLT(const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
1038 btAssert(L && d && b && n > 0 && nskip >= n);
1039 btSolveL1(L, b, n, nskip);
1040 btVectorScale(b, d, n);
1041 btSolveL1T(L, b, n, nskip);
1044 //***************************************************************************
1046 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
1047 // A is nskip. this only references and swaps the lower triangle.
1048 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then
1049 // rows will be swapped by exchanging row pointers. otherwise the data will
1052 static void btSwapRowsAndCols(BTATYPE A, int n, int i1, int i2, int nskip,
1053 int do_fast_row_swaps)
1055 btAssert(A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
1056 nskip >= n && i1 < i2);
1059 btScalar *A_i1 = A[i1];
1060 btScalar *A_i2 = A[i2];
1061 for (int i = i1 + 1; i < i2; ++i)
1063 btScalar *A_i_i1 = A[i] + i1;
1067 A_i1[i2] = A_i1[i1];
1068 A_i1[i1] = A_i2[i1];
1069 A_i2[i1] = A_i2[i2];
1070 // swap rows, by swapping row pointers
1071 if (do_fast_row_swaps)
1078 // Only swap till i2 column to match A plain storage variant.
1079 for (int k = 0; k <= i2; ++k)
1081 btScalar tmp = A_i1[k];
1086 // swap columns the hard way
1087 for (int j = i2 + 1; j < n; ++j)
1089 btScalar *A_j = A[j];
1090 btScalar tmp = A_j[i1];
1095 btScalar *A_i1 = A + i1 * nskip;
1096 btScalar *A_i2 = A + i2 * nskip;
1097 for (int k = 0; k < i1; ++k)
1099 btScalar tmp = A_i1[k];
1103 btScalar *A_i = A_i1 + nskip;
1104 for (int i = i1 + 1; i < i2; A_i += nskip, ++i)
1106 btScalar tmp = A_i2[i];
1111 btScalar tmp = A_i1[i1];
1112 A_i1[i1] = A_i2[i2];
1115 btScalar *A_j = A_i2 + nskip;
1116 for (int j = i2 + 1; j < n; A_j += nskip, ++j)
1118 btScalar tmp = A_j[i1];
1125 // swap two indexes in the n*n LCP problem. i1 must be <= i2.
1127 static void btSwapProblem(BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo,
1128 btScalar *hi, int *p, bool *state, int *findex,
1129 int n, int i1, int i2, int nskip,
1130 int do_fast_row_swaps)
1135 btAssert(n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
1136 if (i1 == i2) return;
1138 btSwapRowsAndCols(A, n, i1, i2, nskip, do_fast_row_swaps);
1165 state[i1] = state[i2];
1171 findex[i1] = findex[i2];
1176 //***************************************************************************
1177 // btLCP manipulator object. this represents an n*n LCP problem.
1179 // two index sets C and N are kept. each set holds a subset of
1180 // the variable indexes 0..n-1. an index can only be in one set.
1181 // initially both sets are empty.
1183 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
1185 //***************************************************************************
1186 // fast implementation of btLCP. see the above definition of btLCP for
1187 // interface comments.
1189 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
1190 // permuted as the other vectors/matrices are permuted.
1192 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
1193 // contiguous indexes. the don't-care indexes follow N.
1195 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
1196 // added or removed from the set C the factorization is updated.
1197 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
1198 // the leading dimension of the matrix L is always `nskip'.
1200 // at the start there may be other indexes that are unbounded but are not
1201 // included in `nub'. btLCP will permute the matrix so that absolutely all
1202 // unbounded vectors are at the start. thus there may be some initial
1205 // the algorithms here assume certain patterns, particularly with respect to
1215 int m_nC, m_nN; // size of each index set
1216 BTATYPE const m_A; // A rows
1217 btScalar *const m_x, *const m_b, *const m_w, *const m_lo, *const m_hi; // permuted LCP problem data
1218 btScalar *const m_L, *const m_d; // L*D*L' factorization of set C
1219 btScalar *const m_Dell, *const m_ell, *const m_tmp;
1220 bool *const m_state;
1221 int *const m_findex, *const m_p, *const m_C;
1223 btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1224 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1225 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1226 bool *_state, int *_findex, int *p, int *c, btScalar **Arows);
1227 int getNub() const { return m_nub; }
1228 void transfer_i_to_C(int i);
1229 void transfer_i_to_N(int i) { m_nN++; } // because we can assume C and N span 1:i-1
1230 void transfer_i_from_N_to_C(int i);
1231 void transfer_i_from_C_to_N(int i, btAlignedObjectArray<btScalar> &scratch);
1232 int numC() const { return m_nC; }
1233 int numN() const { return m_nN; }
1234 int indexC(int i) const { return i; }
1235 int indexN(int i) const { return i + m_nC; }
1236 btScalar Aii(int i) const { return BTAROW(i)[i]; }
1237 btScalar AiC_times_qC(int i, btScalar *q) const { return btLargeDot(BTAROW(i), q, m_nC); }
1238 btScalar AiN_times_qN(int i, btScalar *q) const { return btLargeDot(BTAROW(i) + m_nC, q + m_nC, m_nN); }
1239 void pN_equals_ANC_times_qC(btScalar *p, btScalar *q);
1240 void pN_plusequals_ANi(btScalar *p, int i, int sign = 1);
1241 void pC_plusequals_s_times_qC(btScalar *p, btScalar s, btScalar *q);
1242 void pN_plusequals_s_times_qN(btScalar *p, btScalar s, btScalar *q);
1243 void solve1(btScalar *a, int i, int dir = 1, int only_transfer = 0);
1247 btLCP::btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1248 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1249 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1250 bool *_state, int *_findex, int *p, int *c, btScalar **Arows) : m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
1272 btSetZero(m_x, m_n);
1277 // make matrix row pointers
1278 btScalar *aptr = _Adata;
1280 const int n = m_n, nskip = m_nskip;
1281 for (int k = 0; k < n; aptr += nskip, ++k) A[k] = aptr;
1288 for (int k = 0; k < n; ++k) p[k] = k; // initially unpermuted
1292 // for testing, we can do some random swaps in the area i > nub
1295 const int nub = m_nub;
1297 for (int k=0; k<100; k++) {
1300 i1 = dRandInt(n-nub)+nub;
1301 i2 = dRandInt(n-nub)+nub;
1304 //printf ("--> %d %d\n",i1,i2);
1305 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0);
1310 // permute the problem so that *all* the unbounded variables are at the
1311 // start, i.e. look for unbounded variables not included in `nub'. we can
1312 // potentially push up `nub' this way and get a bigger initial factorization.
1313 // note that when we swap rows/cols here we must not just swap row pointers,
1314 // as the initial factorization relies on the data being all in one chunk.
1315 // variables that have findex >= 0 are *not* considered to be unbounded even
1316 // if lo=-inf and hi=inf - this is because these limits may change during the
1317 // solution process.
1320 int *findex = m_findex;
1321 btScalar *lo = m_lo, *hi = m_hi;
1323 for (int k = m_nub; k < n; ++k)
1325 if (findex && findex[k] >= 0) continue;
1326 if (lo[k] == -BT_INFINITY && hi[k] == BT_INFINITY)
1328 btSwapProblem(m_A, m_x, m_b, m_w, lo, hi, m_p, m_state, findex, n, m_nub, k, m_nskip, 0);
1334 // if there are unbounded variables at the start, factorize A up to that
1335 // point and solve for x. this puts all indexes 0..nub-1 into C.
1338 const int nub = m_nub;
1340 btScalar *Lrow = m_L;
1341 const int nskip = m_nskip;
1342 for (int j = 0; j < nub; Lrow += nskip, ++j) memcpy(Lrow, BTAROW(j), (j + 1) * sizeof(btScalar));
1344 btFactorLDLT(m_L, m_d, nub, m_nskip);
1345 memcpy(m_x, m_b, nub * sizeof(btScalar));
1346 btSolveLDLT(m_L, m_d, m_x, nub, m_nskip);
1347 btSetZero(m_w, nub);
1350 for (int k = 0; k < nub; ++k) C[k] = k;
1355 // permute the indexes > nub such that all findex variables are at the end
1358 const int nub = m_nub;
1359 int *findex = m_findex;
1361 for (int k = m_n - 1; k >= nub; k--)
1365 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, findex, m_n, k, m_n - 1 - num_at_end, m_nskip, 1);
1371 // print info about indexes
1375 const int nub = m_nub;
1376 for (int k=0; k<n; k++) {
1377 if (k<nub) printf ("C");
1378 else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c");
1386 void btLCP::transfer_i_to_C(int i)
1391 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
1393 const int nC = m_nC;
1394 btScalar *const Ltgt = m_L + nC * m_nskip, *ell = m_ell;
1395 for (int j = 0; j < nC; ++j) Ltgt[j] = ell[j];
1397 const int nC = m_nC;
1398 m_d[nC] = btRecip(BTAROW(i)[i] - btLargeDot(m_ell, m_Dell, nC));
1402 m_d[0] = btRecip(BTAROW(i)[i]);
1405 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, m_findex, m_n, m_nC, i, m_nskip, 1);
1407 const int nC = m_nC;
1409 m_nC = nC + 1; // nC value is outdated after this line
1413 void btLCP::transfer_i_from_N_to_C(int i)
1419 btScalar *const aptr = BTAROW(i);
1420 btScalar *Dell = m_Dell;
1422 #ifdef BTNUB_OPTIMIZATIONS
1423 // if nub>0, initial part of aptr unpermuted
1424 const int nub = m_nub;
1426 for (; j < nub; ++j) Dell[j] = aptr[j];
1427 const int nC = m_nC;
1428 for (; j < nC; ++j) Dell[j] = aptr[C[j]];
1430 const int nC = m_nC;
1431 for (int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
1434 btSolveL1(m_L, m_Dell, m_nC, m_nskip);
1436 const int nC = m_nC;
1437 btScalar *const Ltgt = m_L + nC * m_nskip;
1438 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1439 for (int j = 0; j < nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
1441 const int nC = m_nC;
1442 m_d[nC] = btRecip(BTAROW(i)[i] - btLargeDot(m_ell, m_Dell, nC));
1446 m_d[0] = btRecip(BTAROW(i)[i]);
1449 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, m_findex, m_n, m_nC, i, m_nskip, 1);
1451 const int nC = m_nC;
1454 m_nC = nC + 1; // nC value is outdated after this line
1458 // if we just finish here then we'll go back and re-solve for
1459 // delta_x. but actually we can be more efficient and incrementally
1460 // update delta_x here. but if we do this, we wont have ell and Dell
1461 // to use in updating the factorization later.
1464 void btRemoveRowCol(btScalar *A, int n, int nskip, int r)
1466 btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
1467 if (r >= n - 1) return;
1471 const size_t move_size = (n - r - 1) * sizeof(btScalar);
1472 btScalar *Adst = A + r;
1473 for (int i = 0; i < r; Adst += nskip, ++i)
1475 btScalar *Asrc = Adst + 1;
1476 memmove(Adst, Asrc, move_size);
1480 const size_t cpy_size = r * sizeof(btScalar);
1481 btScalar *Adst = A + r * nskip;
1482 for (int i = r; i < (n - 1); ++i)
1484 btScalar *Asrc = Adst + nskip;
1485 memcpy(Adst, Asrc, cpy_size);
1491 const size_t cpy_size = (n - r - 1) * sizeof(btScalar);
1492 btScalar *Adst = A + r * (nskip + 1);
1493 for (int i = r; i < (n - 1); ++i)
1495 btScalar *Asrc = Adst + (nskip + 1);
1496 memcpy(Adst, Asrc, cpy_size);
1502 void btLDLTAddTL(btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar> &scratch)
1504 btAssert(L && d && a && n > 0 && nskip >= n);
1507 scratch.resize(2 * nskip);
1508 btScalar *W1 = &scratch[0];
1510 btScalar *W2 = W1 + nskip;
1512 W1[0] = btScalar(0.0);
1513 W2[0] = btScalar(0.0);
1514 for (int j = 1; j < n; ++j)
1516 W1[j] = W2[j] = (btScalar)(a[j] * SIMDSQRT12);
1518 btScalar W11 = (btScalar)((btScalar(0.5) * a[0] + 1) * SIMDSQRT12);
1519 btScalar W21 = (btScalar)((btScalar(0.5) * a[0] - 1) * SIMDSQRT12);
1521 btScalar alpha1 = btScalar(1.0);
1522 btScalar alpha2 = btScalar(1.0);
1525 btScalar dee = d[0];
1526 btScalar alphanew = alpha1 + (W11 * W11) * dee;
1527 btAssert(alphanew != btScalar(0.0));
1529 btScalar gamma1 = W11 * dee;
1532 alphanew = alpha2 - (W21 * W21) * dee;
1534 //btScalar gamma2 = W21 * dee;
1536 btScalar k1 = btScalar(1.0) - W21 * gamma1;
1537 btScalar k2 = W21 * gamma1 * W11 - W21;
1538 btScalar *ll = L + nskip;
1539 for (int p = 1; p < n; ll += nskip, ++p)
1541 btScalar Wp = W1[p];
1543 W1[p] = Wp - W11 * ell;
1544 W2[p] = k1 * Wp + k2 * ell;
1548 btScalar *ll = L + (nskip + 1);
1549 for (int j = 1; j < n; ll += nskip + 1, ++j)
1551 btScalar k1 = W1[j];
1552 btScalar k2 = W2[j];
1554 btScalar dee = d[j];
1555 btScalar alphanew = alpha1 + (k1 * k1) * dee;
1556 btAssert(alphanew != btScalar(0.0));
1558 btScalar gamma1 = k1 * dee;
1561 alphanew = alpha2 - (k2 * k2) * dee;
1563 btScalar gamma2 = k2 * dee;
1568 btScalar *l = ll + nskip;
1569 for (int p = j + 1; p < n; l += nskip, ++p)
1572 btScalar Wp = W1[p] - k1 * ell;
1575 Wp = W2[p] - k2 * ell;
1583 #define _BTGETA(i, j) (A[i][j])
1584 //#define _GETA(i,j) (A[(i)*nskip+(j)])
1585 #define BTGETA(i, j) ((i > j) ? _BTGETA(i, j) : _BTGETA(j, i))
1587 inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
1589 return nskip * 2 * sizeof(btScalar);
1592 void btLDLTRemove(btScalar **A, const int *p, btScalar *L, btScalar *d,
1593 int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar> &scratch)
1595 btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
1596 n1 >= n2 && nskip >= n1);
1598 for (int i = 0; i < n2; ++i)
1599 btAssert(p[i] >= 0 && p[i] < n1);
1604 return; // deleting last row/col is easy
1608 size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip);
1609 btAssert(LDLTAddTL_size % sizeof(btScalar) == 0);
1610 scratch.resize(nskip * 2 + n2);
1611 btScalar *tmp = &scratch[0];
1614 btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size);
1615 const int p_0 = p[0];
1616 for (int i = 0; i < n2; ++i)
1618 a[i] = -BTGETA(p[i], p_0);
1620 a[0] += btScalar(1.0);
1621 btLDLTAddTL(L, d, a, n2, nskip, scratch);
1625 btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size);
1627 btScalar *Lcurr = L + r * nskip;
1628 for (int i = 0; i < r; ++Lcurr, ++i)
1630 btAssert(d[i] != btScalar(0.0));
1631 t[i] = *Lcurr / d[i];
1634 btScalar *a = t + r;
1636 btScalar *Lcurr = L + r * nskip;
1637 const int *pp_r = p + r, p_r = *pp_r;
1638 const int n2_minus_r = n2 - r;
1639 for (int i = 0; i < n2_minus_r; Lcurr += nskip, ++i)
1641 a[i] = btLargeDot(Lcurr, t, r) - BTGETA(pp_r[i], p_r);
1644 a[0] += btScalar(1.0);
1645 btLDLTAddTL(L + r * nskip + r, d + r, a, n2 - r, nskip, scratch);
1649 // snip out row/column r from L and d
1650 btRemoveRowCol(L, n2, nskip, r);
1651 if (r < (n2 - 1)) memmove(d + r, d + r + 1, (n2 - r - 1) * sizeof(btScalar));
1654 void btLCP::transfer_i_from_C_to_N(int i, btAlignedObjectArray<btScalar> &scratch)
1658 // remove a row/column from the factorization, and adjust the
1659 // indexes (black magic!)
1661 const int nC = m_nC;
1671 btLDLTRemove(m_A, C, m_L, m_d, m_n, nC, j, m_nskip, scratch);
1675 for (k = j + 1; k < nC; ++k)
1689 if (j < (nC - 1)) memmove(C + j, C + j + 1, (nC - j - 1) * sizeof(int));
1695 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, m_findex, m_n, i, nC - 1, m_nskip, 1);
1698 m_nC = nC - 1; // nC value is outdated after this line
1702 void btLCP::pN_equals_ANC_times_qC(btScalar *p, btScalar *q)
1704 // we could try to make this matrix-vector multiplication faster using
1705 // outer product matrix tricks, e.g. with the dMultidotX() functions.
1706 // but i tried it and it actually made things slower on random 100x100
1707 // problems because of the overhead involved. so we'll stick with the
1708 // simple method for now.
1709 const int nC = m_nC;
1710 btScalar *ptgt = p + nC;
1711 const int nN = m_nN;
1712 for (int i = 0; i < nN; ++i)
1714 ptgt[i] = btLargeDot(BTAROW(i + nC), q, nC);
1718 void btLCP::pN_plusequals_ANi(btScalar *p, int i, int sign)
1720 const int nC = m_nC;
1721 btScalar *aptr = BTAROW(i) + nC;
1722 btScalar *ptgt = p + nC;
1725 const int nN = m_nN;
1726 for (int j = 0; j < nN; ++j) ptgt[j] += aptr[j];
1730 const int nN = m_nN;
1731 for (int j = 0; j < nN; ++j) ptgt[j] -= aptr[j];
1735 void btLCP::pC_plusequals_s_times_qC(btScalar *p, btScalar s, btScalar *q)
1737 const int nC = m_nC;
1738 for (int i = 0; i < nC; ++i)
1744 void btLCP::pN_plusequals_s_times_qN(btScalar *p, btScalar s, btScalar *q)
1746 const int nC = m_nC;
1747 btScalar *ptgt = p + nC, *qsrc = q + nC;
1748 const int nN = m_nN;
1749 for (int i = 0; i < nN; ++i)
1751 ptgt[i] += s * qsrc[i];
1755 void btLCP::solve1(btScalar *a, int i, int dir, int only_transfer)
1757 // the `Dell' and `ell' that are computed here are saved. if index i is
1758 // later added to the factorization then they can be reused.
1760 // @@@ question: do we need to solve for entire delta_x??? yes, but
1761 // only if an x goes below 0 during the step.
1766 btScalar *Dell = m_Dell;
1768 btScalar *aptr = BTAROW(i);
1769 #ifdef BTNUB_OPTIMIZATIONS
1770 // if nub>0, initial part of aptr[] is guaranteed unpermuted
1771 const int nub = m_nub;
1773 for (; j < nub; ++j) Dell[j] = aptr[j];
1774 const int nC = m_nC;
1775 for (; j < nC; ++j) Dell[j] = aptr[C[j]];
1777 const int nC = m_nC;
1778 for (int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
1781 btSolveL1(m_L, m_Dell, m_nC, m_nskip);
1783 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1784 const int nC = m_nC;
1785 for (int j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
1790 btScalar *tmp = m_tmp, *ell = m_ell;
1792 const int nC = m_nC;
1793 for (int j = 0; j < nC; ++j) tmp[j] = ell[j];
1795 btSolveL1T(m_L, tmp, m_nC, m_nskip);
1799 btScalar *tmp = m_tmp;
1800 const int nC = m_nC;
1801 for (int j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
1806 btScalar *tmp = m_tmp;
1807 const int nC = m_nC;
1808 for (int j = 0; j < nC; ++j) a[C[j]] = tmp[j];
1814 void btLCP::unpermute()
1816 // now we have to un-permute x and w
1818 memcpy(m_tmp, m_x, m_n * sizeof(btScalar));
1819 btScalar *x = m_x, *tmp = m_tmp;
1822 for (int j = 0; j < n; ++j) x[p[j]] = tmp[j];
1825 memcpy(m_tmp, m_w, m_n * sizeof(btScalar));
1826 btScalar *w = m_w, *tmp = m_tmp;
1829 for (int j = 0; j < n; ++j) w[p[j]] = tmp[j];
1833 #endif // btLCP_FAST
1835 //***************************************************************************
1836 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1838 bool btSolveDantzigLCP(int n, btScalar *A, btScalar *x, btScalar *b,
1839 btScalar *outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory &scratchMem)
1843 // printf("btSolveDantzigLCP n=%d\n",n);
1844 btAssert(n > 0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
1849 // check restrictions on lo and hi
1850 for (int k = 0; k < n; ++k)
1851 btAssert(lo[k] <= 0 && hi[k] >= 0);
1855 // if all the variables are unbounded then we can just factor, solve,
1860 btFactorLDLT(A, outer_w, n, nskip);
1861 btSolveLDLT(A, outer_w, b, n, nskip);
1862 memcpy(x, b, n * sizeof(btScalar));
1867 const int nskip = (n);
1868 scratchMem.L.resize(n * nskip);
1870 scratchMem.d.resize(n);
1872 btScalar *w = outer_w;
1873 scratchMem.delta_w.resize(n);
1874 scratchMem.delta_x.resize(n);
1875 scratchMem.Dell.resize(n);
1876 scratchMem.ell.resize(n);
1877 scratchMem.Arows.resize(n);
1878 scratchMem.p.resize(n);
1879 scratchMem.C.resize(n);
1881 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1882 scratchMem.state.resize(n);
1884 // create LCP object. note that tmp is set to delta_w to save space, this
1885 // optimization relies on knowledge of how tmp is used, so be careful!
1886 btLCP lcp(n, nskip, nub, A, x, b, w, lo, hi, &scratchMem.L[0], &scratchMem.d[0], &scratchMem.Dell[0], &scratchMem.ell[0], &scratchMem.delta_w[0], &scratchMem.state[0], findex, &scratchMem.p[0], &scratchMem.C[0], &scratchMem.Arows[0]);
1887 int adj_nub = lcp.getNub();
1889 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
1890 // LCP conditions then i is added to the appropriate index set. otherwise
1891 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1892 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1893 // while driving x(i) we maintain the LCP conditions on the other variables
1894 // 0..i-1. we do this by watching out for other x(i),w(i) values going
1895 // outside the valid region, and then switching them between index sets
1896 // when that happens.
1898 bool hit_first_friction_index = false;
1899 for (int i = adj_nub; i < n; ++i)
1902 // the index i is the driving index and indexes i+1..n-1 are "dont care",
1903 // i.e. when we make changes to the system those x's will be zero and we
1904 // don't care what happens to those w's. in other words, we only consider
1905 // an (i+1)*(i+1) sub-problem of A*x=b+w.
1907 // if we've hit the first friction index, we have to compute the lo and
1908 // hi values based on the values of x already computed. we have been
1909 // permuting the indexes, so the values stored in the findex vector are
1910 // no longer valid. thus we have to temporarily unpermute the x vector.
1911 // for the purposes of this computation, 0*infinity = 0 ... so if the
1912 // contact constraint's normal force is 0, there should be no tangential
1915 if (!hit_first_friction_index && findex && findex[i] >= 0)
1917 // un-permute x into delta_w, which is not being used at the moment
1918 for (int j = 0; j < n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j];
1920 // set lo and hi values
1921 for (int k = i; k < n; ++k)
1923 btScalar wfk = scratchMem.delta_w[findex[k]];
1931 hi[k] = btFabs(hi[k] * wfk);
1935 hit_first_friction_index = true;
1938 // thus far we have not even been computing the w values for indexes
1939 // greater than i, so compute w[i] now.
1940 w[i] = lcp.AiC_times_qC(i, x) + lcp.AiN_times_qN(i, x) - b[i];
1942 // if lo=hi=0 (which can happen for tangential friction when normals are
1943 // 0) then the index will be assigned to set N with some state. however,
1944 // set C's line has zero size, so the index will always remain in set N.
1945 // with the "normal" switching logic, if w changed sign then the index
1946 // would have to switch to set C and then back to set N with an inverted
1947 // state. this is pointless, and also computationally expensive. to
1948 // prevent this from happening, we use the rule that indexes with lo=hi=0
1949 // will never be checked for set changes. this means that the state for
1950 // these indexes may be incorrect, but that doesn't matter.
1952 // see if x(i),w(i) is in a valid region
1953 if (lo[i] == 0 && w[i] >= 0)
1955 lcp.transfer_i_to_N(i);
1956 scratchMem.state[i] = false;
1958 else if (hi[i] == 0 && w[i] <= 0)
1960 lcp.transfer_i_to_N(i);
1961 scratchMem.state[i] = true;
1965 // this is a degenerate case. by the time we get to this test we know
1966 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1967 // and similarly that hi > 0. this means that the line segment
1968 // corresponding to set C is at least finite in extent, and we are on it.
1969 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
1970 lcp.solve1(&scratchMem.delta_x[0], i, 0, 1);
1972 lcp.transfer_i_to_C(i);
1976 // we must push x(i) and w(i)
1981 // find direction to push on x(i)
1985 dirf = btScalar(1.0);
1990 dirf = btScalar(-1.0);
1993 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1994 lcp.solve1(&scratchMem.delta_x[0], i, dir);
1996 // note that delta_x[i] = dirf, but we wont bother to set it
1998 // compute: delta_w = A*delta_x ... note we only care about
1999 // delta_w(N) and delta_w(i), the rest is ignored
2000 lcp.pN_equals_ANC_times_qC(&scratchMem.delta_w[0], &scratchMem.delta_x[0]);
2001 lcp.pN_plusequals_ANi(&scratchMem.delta_w[0], i, dir);
2002 scratchMem.delta_w[i] = lcp.AiC_times_qC(i, &scratchMem.delta_x[0]) + lcp.Aii(i) * dirf;
2004 // find largest step we can take (size=s), either to drive x(i),w(i)
2005 // to the valid LCP region or to drive an already-valid variable
2006 // outside the valid region.
2008 int cmd = 1; // index switching command
2009 int si = 0; // si = index to switch if cmd>3
2010 btScalar s = -w[i] / scratchMem.delta_w[i];
2013 if (hi[i] < BT_INFINITY)
2015 btScalar s2 = (hi[i] - x[i]) * dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
2025 if (lo[i] > -BT_INFINITY)
2027 btScalar s2 = (lo[i] - x[i]) * dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
2037 const int numN = lcp.numN();
2038 for (int k = 0; k < numN; ++k)
2040 const int indexN_k = lcp.indexN(k);
2041 if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0)
2043 // don't bother checking if lo=hi=0
2044 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue;
2045 btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k];
2057 const int numC = lcp.numC();
2058 for (int k = adj_nub; k < numC; ++k)
2060 const int indexC_k = lcp.indexC(k);
2061 if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY)
2063 btScalar s2 = (lo[indexC_k] - x[indexC_k]) / scratchMem.delta_x[indexC_k];
2071 if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY)
2073 btScalar s2 = (hi[indexC_k] - x[indexC_k]) / scratchMem.delta_x[indexC_k];
2084 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
2085 // "C->NL","C->NH"};
2086 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
2088 // if s <= 0 then we've got a problem. if we just keep going then
2089 // we're going to get stuck in an infinite loop. instead, just cross
2090 // our fingers and exit with the current solution.
2091 if (s <= btScalar(0.0))
2093 // printf("LCP internal error, s <= 0 (s=%.4e)",(double)s);
2096 btSetZero(x + i, n - i);
2097 btSetZero(w + i, n - i);
2103 // apply x = x + s * delta_x
2104 lcp.pC_plusequals_s_times_qC(x, s, &scratchMem.delta_x[0]);
2107 // apply w = w + s * delta_w
2108 lcp.pN_plusequals_s_times_qN(w, s, &scratchMem.delta_w[0]);
2109 w[i] += s * scratchMem.delta_w[i];
2112 // switch indexes between sets if necessary
2117 lcp.transfer_i_to_C(i);
2121 scratchMem.state[i] = false;
2122 lcp.transfer_i_to_N(i);
2126 scratchMem.state[i] = true;
2127 lcp.transfer_i_to_N(i);
2129 case 4: // keep going
2131 lcp.transfer_i_from_N_to_C(si);
2133 case 5: // keep going
2135 scratchMem.state[si] = false;
2136 lcp.transfer_i_from_C_to_N(si, scratchMem.m_scratch);
2138 case 6: // keep going
2140 scratchMem.state[si] = true;
2141 lcp.transfer_i_from_C_to_N(si, scratchMem.m_scratch);
2145 if (cmd <= 3) break;
2153 } // for (int i=adj_nub; i<n; ++i)