3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download DBBCSD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbbcsd.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbbcsd.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbbcsd.f">
21 * SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
22 * THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
23 * V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
24 * B22D, B22E, WORK, LWORK, INFO )
26 * .. Scalar Arguments ..
27 * CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
28 * INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q
30 * .. Array Arguments ..
31 * DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
32 * $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
33 * $ PHI( * ), THETA( * ), WORK( * )
34 * DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
44 *> DBBCSD computes the CS decomposition of an orthogonal matrix in
45 *> bidiagonal-block form,
50 *> X = [----------------]
55 *> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T
56 *> = [---------] [---------------] [---------] .
57 *> [ | U2 ] [ S | C 0 0 ] [ | V2 ]
60 *> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
61 *> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
62 *> transposed and/or permuted. This can be done in constant time using
63 *> the TRANS and SIGNS options. See DORCSD for details.)
65 *> The bidiagonal matrices B11, B12, B21, and B22 are represented
66 *> implicitly by angles THETA(1:Q) and PHI(1:Q-1).
68 *> The orthogonal matrices U1, U2, V1T, and V2T are input/output.
69 *> The input matrices are pre- or post-multiplied by the appropriate
70 *> singular vector matrices.
79 *> = 'Y': U1 is updated;
80 *> otherwise: U1 is not updated.
86 *> = 'Y': U2 is updated;
87 *> otherwise: U2 is not updated.
92 *> JOBV1T is CHARACTER
93 *> = 'Y': V1T is updated;
94 *> otherwise: V1T is not updated.
99 *> JOBV2T is CHARACTER
100 *> = 'Y': V2T is updated;
101 *> otherwise: V2T is not updated.
106 *> TRANS is CHARACTER
107 *> = 'T': X, U1, U2, V1T, and V2T are stored in row-major
109 *> otherwise: X, U1, U2, V1T, and V2T are stored in column-
116 *> The number of rows and columns in X, the orthogonal matrix in
117 *> bidiagonal-block form.
123 *> The number of rows in the top-left block of X. 0 <= P <= M.
129 *> The number of columns in the top-left block of X.
130 *> 0 <= Q <= MIN(P,M-P,M-Q).
133 *> \param[in,out] THETA
135 *> THETA is DOUBLE PRECISION array, dimension (Q)
136 *> On entry, the angles THETA(1),...,THETA(Q) that, along with
137 *> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
138 *> form. On exit, the angles whose cosines and sines define the
139 *> diagonal blocks in the CS decomposition.
142 *> \param[in,out] PHI
144 *> PHI is DOUBLE PRECISION array, dimension (Q-1)
145 *> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
146 *> THETA(Q), define the matrix in bidiagonal-block form.
151 *> U1 is DOUBLE PRECISION array, dimension (LDU1,P)
152 *> On entry, a P-by-P matrix. On exit, U1 is postmultiplied
153 *> by the left singular vector matrix common to [ B11 ; 0 ] and
154 *> [ B12 0 0 ; 0 -I 0 0 ].
160 *> The leading dimension of the array U1, LDU1 >= MAX(1,P).
165 *> U2 is DOUBLE PRECISION array, dimension (LDU2,M-P)
166 *> On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is
167 *> postmultiplied by the left singular vector matrix common to
168 *> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
174 *> The leading dimension of the array U2, LDU2 >= MAX(1,M-P).
177 *> \param[in,out] V1T
179 *> V1T is DOUBLE PRECISION array, dimension (LDV1T,Q)
180 *> On entry, a Q-by-Q matrix. On exit, V1T is premultiplied
181 *> by the transpose of the right singular vector
182 *> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
188 *> The leading dimension of the array V1T, LDV1T >= MAX(1,Q).
191 *> \param[in,out] V2T
193 *> V2T is DOUBLE PRECISION array, dimenison (LDV2T,M-Q)
194 *> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is
195 *> premultiplied by the transpose of the right
196 *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
197 *> [ B22 0 0 ; 0 0 I ].
203 *> The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q).
208 *> B11D is DOUBLE PRECISION array, dimension (Q)
209 *> When DBBCSD converges, B11D contains the cosines of THETA(1),
210 *> ..., THETA(Q). If DBBCSD fails to converge, then B11D
211 *> contains the diagonal of the partially reduced top-left
217 *> B11E is DOUBLE PRECISION array, dimension (Q-1)
218 *> When DBBCSD converges, B11E contains zeros. If DBBCSD fails
219 *> to converge, then B11E contains the superdiagonal of the
220 *> partially reduced top-left block.
225 *> B12D is DOUBLE PRECISION array, dimension (Q)
226 *> When DBBCSD converges, B12D contains the negative sines of
227 *> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then
228 *> B12D contains the diagonal of the partially reduced top-right
234 *> B12E is DOUBLE PRECISION array, dimension (Q-1)
235 *> When DBBCSD converges, B12E contains zeros. If DBBCSD fails
236 *> to converge, then B12E contains the subdiagonal of the
237 *> partially reduced top-right block.
242 *> B21D is DOUBLE PRECISION array, dimension (Q)
243 *> When DBBCSD converges, B21D contains the negative sines of
244 *> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then
245 *> B21D contains the diagonal of the partially reduced bottom-left
251 *> B21E is DOUBLE PRECISION array, dimension (Q-1)
252 *> When DBBCSD converges, B21E contains zeros. If DBBCSD fails
253 *> to converge, then B21E contains the subdiagonal of the
254 *> partially reduced bottom-left block.
259 *> B22D is DOUBLE PRECISION array, dimension (Q)
260 *> When DBBCSD converges, B22D contains the negative sines of
261 *> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then
262 *> B22D contains the diagonal of the partially reduced bottom-right
268 *> B22E is DOUBLE PRECISION array, dimension (Q-1)
269 *> When DBBCSD converges, B22E contains zeros. If DBBCSD fails
270 *> to converge, then B22E contains the subdiagonal of the
271 *> partially reduced bottom-right block.
276 *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
277 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
283 *> The dimension of the array WORK. LWORK >= MAX(1,8*Q).
285 *> If LWORK = -1, then a workspace query is assumed; the
286 *> routine only calculates the optimal size of the WORK array,
287 *> returns this value as the first entry of the work array, and
288 *> no error message related to LWORK is issued by XERBLA.
294 *> = 0: successful exit.
295 *> < 0: if INFO = -i, the i-th argument had an illegal value.
296 *> > 0: if DBBCSD did not converge, INFO specifies the number
297 *> of nonzero entries in PHI, and B11D, B11E, etc.,
298 *> contain the partially reduced matrix.
301 *> \par Internal Parameters:
302 * =========================
305 *> TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
306 *> TOLMUL controls the convergence criterion of the QR loop.
307 *> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
308 *> are within TOLMUL*EPS of either bound.
314 *> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
315 *> Algorithms, 50(1):33-65, 2009.
320 *> \author Univ. of Tennessee
321 *> \author Univ. of California Berkeley
322 *> \author Univ. of Colorado Denver
327 *> \ingroup doubleOTHERcomputational
329 * =====================================================================
330 SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
331 $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
332 $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
333 $ B22D, B22E, WORK, LWORK, INFO )
335 * -- LAPACK computational routine (version 3.6.1) --
336 * -- LAPACK is a software package provided by Univ. of Tennessee, --
337 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
340 * .. Scalar Arguments ..
341 CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
342 INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q
344 * .. Array Arguments ..
345 DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
346 $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
347 $ PHI( * ), THETA( * ), WORK( * )
348 DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
352 * ===================================================================
356 PARAMETER ( MAXITR = 6 )
357 DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
358 PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
359 $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
360 $ TEN = 10.0D0, ZERO = 0.0D0 )
361 DOUBLE PRECISION NEGONE
362 PARAMETER ( NEGONE = -1.0D0 )
364 * .. Local Scalars ..
365 LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12,
366 $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
368 INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
369 $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
370 $ LWORKMIN, LWORKOPT, MAXIT, MINI
371 DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
372 $ EPS, MU, NU, R, SIGMA11, SIGMA21,
373 $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
374 $ UNFL, X1, X2, Y1, Y2
376 * .. External Subroutines ..
377 EXTERNAL DLASR, DSCAL, DSWAP, DLARTGP, DLARTGS, DLAS2,
380 * .. External Functions ..
381 DOUBLE PRECISION DLAMCH
383 EXTERNAL LSAME, DLAMCH
385 * .. Intrinsic Functions ..
386 INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT
388 * .. Executable Statements ..
390 * Test input arguments
393 LQUERY = LWORK .EQ. -1
394 WANTU1 = LSAME( JOBU1, 'Y' )
395 WANTU2 = LSAME( JOBU2, 'Y' )
396 WANTV1T = LSAME( JOBV1T, 'Y' )
397 WANTV2T = LSAME( JOBV2T, 'Y' )
398 COLMAJOR = .NOT. LSAME( TRANS, 'T' )
402 ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
404 ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
406 ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN
408 ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
410 ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
412 ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
414 ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
418 * Quick return if Q = 0
420 IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
428 IF( INFO .EQ. 0 ) THEN
437 LWORKOPT = IV2TSN + Q - 1
440 IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN
445 IF( INFO .NE. 0 ) THEN
446 CALL XERBLA( 'DBBCSD', -INFO )
448 ELSE IF( LQUERY ) THEN
452 * Get machine constants
454 EPS = DLAMCH( 'Epsilon' )
455 UNFL = DLAMCH( 'Safe minimum' )
456 TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
458 THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
460 * Test for negligible sines or cosines
463 IF( THETA(I) .LT. THRESH ) THEN
465 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
470 IF( PHI(I) .LT. THRESH ) THEN
472 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
480 DO WHILE( IMAX .GT. 1 )
481 IF( PHI(IMAX-1) .NE. ZERO ) THEN
487 IF ( IMIN .GT. 1 ) THEN
488 DO WHILE( PHI(IMIN-1) .NE. ZERO )
490 IF ( IMIN .LE. 1 ) EXIT
494 * Initialize iteration counter
499 * Begin main iteration loop
501 DO WHILE( IMAX .GT. 1 )
503 * Compute the matrix entries
505 B11D(IMIN) = COS( THETA(IMIN) )
506 B21D(IMIN) = -SIN( THETA(IMIN) )
507 DO I = IMIN, IMAX - 1
508 B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
509 B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) )
510 B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
511 B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
512 B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
513 B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) )
514 B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
515 B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
517 B12D(IMAX) = SIN( THETA(IMAX) )
518 B22D(IMAX) = COS( THETA(IMAX) )
520 * Abort if not converging; otherwise, increment ITER
522 IF( ITER .GT. MAXIT ) THEN
525 IF( PHI(I) .NE. ZERO )
531 ITER = ITER + IMAX - IMIN
535 THETAMAX = THETA(IMIN)
536 THETAMIN = THETA(IMIN)
538 IF( THETA(I) > THETAMAX )
539 $ THETAMAX = THETA(I)
540 IF( THETA(I) < THETAMIN )
541 $ THETAMIN = THETA(I)
544 IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
546 * Zero on diagonals of B11 and B22; induce deflation with a
552 ELSE IF( THETAMIN .LT. THRESH ) THEN
554 * Zero on diagonals of B12 and B22; induce deflation with a
562 * Compute shifts for B11 and B21 and use the lesser
564 CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
566 CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
569 IF( SIGMA11 .LE. SIGMA21 ) THEN
571 NU = SQRT( ONE - MU**2 )
572 IF( MU .LT. THRESH ) THEN
578 MU = SQRT( 1.0 - NU**2 )
579 IF( NU .LT. THRESH ) THEN
586 * Rotate to produce bulges in B11 and B21
588 IF( MU .LE. NU ) THEN
589 CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
590 $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
592 CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
593 $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
596 TEMP = WORK(IV1TCS+IMIN-1)*B11D(IMIN) +
597 $ WORK(IV1TSN+IMIN-1)*B11E(IMIN)
598 B11E(IMIN) = WORK(IV1TCS+IMIN-1)*B11E(IMIN) -
599 $ WORK(IV1TSN+IMIN-1)*B11D(IMIN)
601 B11BULGE = WORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
602 B11D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
603 TEMP = WORK(IV1TCS+IMIN-1)*B21D(IMIN) +
604 $ WORK(IV1TSN+IMIN-1)*B21E(IMIN)
605 B21E(IMIN) = WORK(IV1TCS+IMIN-1)*B21E(IMIN) -
606 $ WORK(IV1TSN+IMIN-1)*B21D(IMIN)
608 B21BULGE = WORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
609 B21D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
611 * Compute THETA(IMIN)
613 THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ),
614 $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
616 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
618 IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
619 CALL DLARTGP( B11BULGE, B11D(IMIN), WORK(IU1SN+IMIN-1),
620 $ WORK(IU1CS+IMIN-1), R )
621 ELSE IF( MU .LE. NU ) THEN
622 CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
623 $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
625 CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
626 $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
628 IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
629 CALL DLARTGP( B21BULGE, B21D(IMIN), WORK(IU2SN+IMIN-1),
630 $ WORK(IU2CS+IMIN-1), R )
631 ELSE IF( NU .LT. MU ) THEN
632 CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
633 $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
635 CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
636 $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
638 WORK(IU2CS+IMIN-1) = -WORK(IU2CS+IMIN-1)
639 WORK(IU2SN+IMIN-1) = -WORK(IU2SN+IMIN-1)
641 TEMP = WORK(IU1CS+IMIN-1)*B11E(IMIN) +
642 $ WORK(IU1SN+IMIN-1)*B11D(IMIN+1)
643 B11D(IMIN+1) = WORK(IU1CS+IMIN-1)*B11D(IMIN+1) -
644 $ WORK(IU1SN+IMIN-1)*B11E(IMIN)
646 IF( IMAX .GT. IMIN+1 ) THEN
647 B11BULGE = WORK(IU1SN+IMIN-1)*B11E(IMIN+1)
648 B11E(IMIN+1) = WORK(IU1CS+IMIN-1)*B11E(IMIN+1)
650 TEMP = WORK(IU1CS+IMIN-1)*B12D(IMIN) +
651 $ WORK(IU1SN+IMIN-1)*B12E(IMIN)
652 B12E(IMIN) = WORK(IU1CS+IMIN-1)*B12E(IMIN) -
653 $ WORK(IU1SN+IMIN-1)*B12D(IMIN)
655 B12BULGE = WORK(IU1SN+IMIN-1)*B12D(IMIN+1)
656 B12D(IMIN+1) = WORK(IU1CS+IMIN-1)*B12D(IMIN+1)
657 TEMP = WORK(IU2CS+IMIN-1)*B21E(IMIN) +
658 $ WORK(IU2SN+IMIN-1)*B21D(IMIN+1)
659 B21D(IMIN+1) = WORK(IU2CS+IMIN-1)*B21D(IMIN+1) -
660 $ WORK(IU2SN+IMIN-1)*B21E(IMIN)
662 IF( IMAX .GT. IMIN+1 ) THEN
663 B21BULGE = WORK(IU2SN+IMIN-1)*B21E(IMIN+1)
664 B21E(IMIN+1) = WORK(IU2CS+IMIN-1)*B21E(IMIN+1)
666 TEMP = WORK(IU2CS+IMIN-1)*B22D(IMIN) +
667 $ WORK(IU2SN+IMIN-1)*B22E(IMIN)
668 B22E(IMIN) = WORK(IU2CS+IMIN-1)*B22E(IMIN) -
669 $ WORK(IU2SN+IMIN-1)*B22D(IMIN)
671 B22BULGE = WORK(IU2SN+IMIN-1)*B22D(IMIN+1)
672 B22D(IMIN+1) = WORK(IU2CS+IMIN-1)*B22D(IMIN+1)
674 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
675 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
678 DO I = IMIN+1, IMAX-1
682 X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1)
683 X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
684 Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1)
685 Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
687 PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
689 * Determine if there are bulges to chase or if a new direct
690 * summand has been reached
692 RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
693 RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
694 RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
695 RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
697 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
698 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
699 * chasing by applying the original shift again.
701 IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
702 CALL DLARTGP( X2, X1, WORK(IV1TSN+I-1), WORK(IV1TCS+I-1),
704 ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN
705 CALL DLARTGP( B11BULGE, B11E(I-1), WORK(IV1TSN+I-1),
706 $ WORK(IV1TCS+I-1), R )
707 ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
708 CALL DLARTGP( B21BULGE, B21E(I-1), WORK(IV1TSN+I-1),
709 $ WORK(IV1TCS+I-1), R )
710 ELSE IF( MU .LE. NU ) THEN
711 CALL DLARTGS( B11D(I), B11E(I), MU, WORK(IV1TCS+I-1),
714 CALL DLARTGS( B21D(I), B21E(I), NU, WORK(IV1TCS+I-1),
717 WORK(IV1TCS+I-1) = -WORK(IV1TCS+I-1)
718 WORK(IV1TSN+I-1) = -WORK(IV1TSN+I-1)
719 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
720 CALL DLARTGP( Y2, Y1, WORK(IV2TSN+I-1-1),
721 $ WORK(IV2TCS+I-1-1), R )
722 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
723 CALL DLARTGP( B12BULGE, B12D(I-1), WORK(IV2TSN+I-1-1),
724 $ WORK(IV2TCS+I-1-1), R )
725 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
726 CALL DLARTGP( B22BULGE, B22D(I-1), WORK(IV2TSN+I-1-1),
727 $ WORK(IV2TCS+I-1-1), R )
728 ELSE IF( NU .LT. MU ) THEN
729 CALL DLARTGS( B12E(I-1), B12D(I), NU, WORK(IV2TCS+I-1-1),
730 $ WORK(IV2TSN+I-1-1) )
732 CALL DLARTGS( B22E(I-1), B22D(I), MU, WORK(IV2TCS+I-1-1),
733 $ WORK(IV2TSN+I-1-1) )
736 TEMP = WORK(IV1TCS+I-1)*B11D(I) + WORK(IV1TSN+I-1)*B11E(I)
737 B11E(I) = WORK(IV1TCS+I-1)*B11E(I) -
738 $ WORK(IV1TSN+I-1)*B11D(I)
740 B11BULGE = WORK(IV1TSN+I-1)*B11D(I+1)
741 B11D(I+1) = WORK(IV1TCS+I-1)*B11D(I+1)
742 TEMP = WORK(IV1TCS+I-1)*B21D(I) + WORK(IV1TSN+I-1)*B21E(I)
743 B21E(I) = WORK(IV1TCS+I-1)*B21E(I) -
744 $ WORK(IV1TSN+I-1)*B21D(I)
746 B21BULGE = WORK(IV1TSN+I-1)*B21D(I+1)
747 B21D(I+1) = WORK(IV1TCS+I-1)*B21D(I+1)
748 TEMP = WORK(IV2TCS+I-1-1)*B12E(I-1) +
749 $ WORK(IV2TSN+I-1-1)*B12D(I)
750 B12D(I) = WORK(IV2TCS+I-1-1)*B12D(I) -
751 $ WORK(IV2TSN+I-1-1)*B12E(I-1)
753 B12BULGE = WORK(IV2TSN+I-1-1)*B12E(I)
754 B12E(I) = WORK(IV2TCS+I-1-1)*B12E(I)
755 TEMP = WORK(IV2TCS+I-1-1)*B22E(I-1) +
756 $ WORK(IV2TSN+I-1-1)*B22D(I)
757 B22D(I) = WORK(IV2TCS+I-1-1)*B22D(I) -
758 $ WORK(IV2TSN+I-1-1)*B22E(I-1)
760 B22BULGE = WORK(IV2TSN+I-1-1)*B22E(I)
761 B22E(I) = WORK(IV2TCS+I-1-1)*B22E(I)
765 X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
766 X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
767 Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
768 Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
770 THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
772 * Determine if there are bulges to chase or if a new direct
773 * summand has been reached
775 RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
776 RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
777 RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
778 RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
780 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
781 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
782 * chasing by applying the original shift again.
784 IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
785 CALL DLARTGP( X2, X1, WORK(IU1SN+I-1), WORK(IU1CS+I-1),
787 ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN
788 CALL DLARTGP( B11BULGE, B11D(I), WORK(IU1SN+I-1),
789 $ WORK(IU1CS+I-1), R )
790 ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
791 CALL DLARTGP( B12BULGE, B12E(I-1), WORK(IU1SN+I-1),
792 $ WORK(IU1CS+I-1), R )
793 ELSE IF( MU .LE. NU ) THEN
794 CALL DLARTGS( B11E(I), B11D(I+1), MU, WORK(IU1CS+I-1),
797 CALL DLARTGS( B12D(I), B12E(I), NU, WORK(IU1CS+I-1),
800 IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
801 CALL DLARTGP( Y2, Y1, WORK(IU2SN+I-1), WORK(IU2CS+I-1),
803 ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN
804 CALL DLARTGP( B21BULGE, B21D(I), WORK(IU2SN+I-1),
805 $ WORK(IU2CS+I-1), R )
806 ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
807 CALL DLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1),
808 $ WORK(IU2CS+I-1), R )
809 ELSE IF( NU .LT. MU ) THEN
810 CALL DLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1),
813 CALL DLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1),
816 WORK(IU2CS+I-1) = -WORK(IU2CS+I-1)
817 WORK(IU2SN+I-1) = -WORK(IU2SN+I-1)
819 TEMP = WORK(IU1CS+I-1)*B11E(I) + WORK(IU1SN+I-1)*B11D(I+1)
820 B11D(I+1) = WORK(IU1CS+I-1)*B11D(I+1) -
821 $ WORK(IU1SN+I-1)*B11E(I)
823 IF( I .LT. IMAX - 1 ) THEN
824 B11BULGE = WORK(IU1SN+I-1)*B11E(I+1)
825 B11E(I+1) = WORK(IU1CS+I-1)*B11E(I+1)
827 TEMP = WORK(IU2CS+I-1)*B21E(I) + WORK(IU2SN+I-1)*B21D(I+1)
828 B21D(I+1) = WORK(IU2CS+I-1)*B21D(I+1) -
829 $ WORK(IU2SN+I-1)*B21E(I)
831 IF( I .LT. IMAX - 1 ) THEN
832 B21BULGE = WORK(IU2SN+I-1)*B21E(I+1)
833 B21E(I+1) = WORK(IU2CS+I-1)*B21E(I+1)
835 TEMP = WORK(IU1CS+I-1)*B12D(I) + WORK(IU1SN+I-1)*B12E(I)
836 B12E(I) = WORK(IU1CS+I-1)*B12E(I) - WORK(IU1SN+I-1)*B12D(I)
838 B12BULGE = WORK(IU1SN+I-1)*B12D(I+1)
839 B12D(I+1) = WORK(IU1CS+I-1)*B12D(I+1)
840 TEMP = WORK(IU2CS+I-1)*B22D(I) + WORK(IU2SN+I-1)*B22E(I)
841 B22E(I) = WORK(IU2CS+I-1)*B22E(I) - WORK(IU2SN+I-1)*B22D(I)
843 B22BULGE = WORK(IU2SN+I-1)*B22D(I+1)
844 B22D(I+1) = WORK(IU2CS+I-1)*B22D(I+1)
848 * Compute PHI(IMAX-1)
850 X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) +
851 $ COS(THETA(IMAX-1))*B21E(IMAX-1)
852 Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) +
853 $ COS(THETA(IMAX-1))*B22D(IMAX-1)
854 Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
856 PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) )
858 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
860 RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
861 RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
863 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
864 CALL DLARTGP( Y2, Y1, WORK(IV2TSN+IMAX-1-1),
865 $ WORK(IV2TCS+IMAX-1-1), R )
866 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
867 CALL DLARTGP( B12BULGE, B12D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
868 $ WORK(IV2TCS+IMAX-1-1), R )
869 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
870 CALL DLARTGP( B22BULGE, B22D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
871 $ WORK(IV2TCS+IMAX-1-1), R )
872 ELSE IF( NU .LT. MU ) THEN
873 CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
874 $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
876 CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
877 $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
880 TEMP = WORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) +
881 $ WORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
882 B12D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
883 $ WORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
885 TEMP = WORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) +
886 $ WORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
887 B22D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
888 $ WORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
891 * Update singular vectors
895 CALL DLASR( 'R', 'V', 'F', P, IMAX-IMIN+1,
896 $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
899 CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, P,
900 $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
906 CALL DLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1,
907 $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
910 CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P,
911 $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
917 CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q,
918 $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
919 $ V1T(IMIN,1), LDV1T )
921 CALL DLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1,
922 $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
923 $ V1T(1,IMIN), LDV1T )
928 CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q,
929 $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
930 $ V2T(IMIN,1), LDV2T )
932 CALL DLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1,
933 $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
934 $ V2T(1,IMIN), LDV2T )
938 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
940 IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN
941 B11D(IMAX) = -B11D(IMAX)
942 B21D(IMAX) = -B21D(IMAX)
945 CALL DSCAL( Q, NEGONE, V1T(IMAX,1), LDV1T )
947 CALL DSCAL( Q, NEGONE, V1T(1,IMAX), 1 )
952 * Compute THETA(IMAX)
954 X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
955 $ SIN(PHI(IMAX-1))*B12E(IMAX-1)
956 Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
957 $ SIN(PHI(IMAX-1))*B22E(IMAX-1)
959 THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) )
961 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
962 * and B22(IMAX,IMAX-1)
964 IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN
965 B12D(IMAX) = -B12D(IMAX)
968 CALL DSCAL( P, NEGONE, U1(1,IMAX), 1 )
970 CALL DSCAL( P, NEGONE, U1(IMAX,1), LDU1 )
974 IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN
975 B22D(IMAX) = -B22D(IMAX)
978 CALL DSCAL( M-P, NEGONE, U2(1,IMAX), 1 )
980 CALL DSCAL( M-P, NEGONE, U2(IMAX,1), LDU2 )
985 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
987 IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
990 CALL DSCAL( M-Q, NEGONE, V2T(IMAX,1), LDV2T )
992 CALL DSCAL( M-Q, NEGONE, V2T(1,IMAX), 1 )
997 * Test for negligible sines or cosines
1000 IF( THETA(I) .LT. THRESH ) THEN
1002 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
1007 IF( PHI(I) .LT. THRESH ) THEN
1009 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
1016 IF (IMAX .GT. 1) THEN
1017 DO WHILE( PHI(IMAX-1) .EQ. ZERO )
1019 IF (IMAX .LE. 1) EXIT
1022 IF( IMIN .GT. IMAX - 1 )
1024 IF (IMIN .GT. 1) THEN
1025 DO WHILE (PHI(IMIN-1) .NE. ZERO)
1027 IF (IMIN .LE. 1) EXIT
1031 * Repeat main iteration loop
1035 * Postprocessing: order THETA from least to greatest
1042 IF( THETA(J) .LT. THETAMIN ) THEN
1048 IF( MINI .NE. I ) THEN
1049 THETA(MINI) = THETA(I)
1053 $ CALL DSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
1055 $ CALL DSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
1057 $ CALL DSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
1059 $ CALL DSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
1063 $ CALL DSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
1065 $ CALL DSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
1067 $ CALL DSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
1069 $ CALL DSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )