1 *> \brief \b CLALSD uses the singular value decomposition of A to solve the least squares problem.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download CLALSD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clalsd.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clalsd.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clalsd.f">
21 * SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
22 * RANK, WORK, RWORK, IWORK, INFO )
24 * .. Scalar Arguments ..
26 * INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ
29 * .. Array Arguments ..
31 * REAL D( * ), E( * ), RWORK( * )
32 * COMPLEX B( LDB, * ), WORK( * )
41 *> CLALSD uses the singular value decomposition of A to solve the least
42 *> squares problem of finding X to minimize the Euclidean norm of each
43 *> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
44 *> are N-by-NRHS. The solution X overwrites B.
46 *> The singular values of A smaller than RCOND times the largest
47 *> singular value are treated as zero in solving the least squares
48 *> problem; in this case a minimum norm solution is returned.
49 *> The actual singular values are returned in D in ascending order.
51 *> This code makes very mild assumptions about floating point
52 *> arithmetic. It will work on machines with a guard digit in
53 *> add/subtract, or on those binary machines without guard digits
54 *> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
55 *> It could conceivably fail on hexadecimal or decimal machines
56 *> without guard digits, but we know of none.
64 *> UPLO is CHARACTER*1
65 *> = 'U': D and E define an upper bidiagonal matrix.
66 *> = 'L': D and E define a lower bidiagonal matrix.
72 *> The maximum size of the subproblems at the bottom of the
79 *> The dimension of the bidiagonal matrix. N >= 0.
85 *> The number of columns of B. NRHS must be at least 1.
90 *> D is REAL array, dimension (N)
91 *> On entry D contains the main diagonal of the bidiagonal
92 *> matrix. On exit, if INFO = 0, D contains its singular values.
97 *> E is REAL array, dimension (N-1)
98 *> Contains the super-diagonal entries of the bidiagonal matrix.
99 *> On exit, E has been destroyed.
104 *> B is COMPLEX array, dimension (LDB,NRHS)
105 *> On input, B contains the right hand sides of the least
106 *> squares problem. On output, B contains the solution X.
112 *> The leading dimension of B in the calling subprogram.
113 *> LDB must be at least max(1,N).
119 *> The singular values of A less than or equal to RCOND times
120 *> the largest singular value are treated as zero in solving
121 *> the least squares problem. If RCOND is negative,
122 *> machine precision is used instead.
123 *> For example, if diag(S)*X=B were the least squares problem,
124 *> where diag(S) is a diagonal matrix of singular values, the
125 *> solution would be X(i) = B(i) / S(i) if S(i) is greater than
126 *> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
133 *> The number of singular values of A greater than RCOND times
134 *> the largest singular value.
139 *> WORK is COMPLEX array, dimension (N * NRHS).
144 *> RWORK is REAL array, dimension at least
145 *> (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
146 *> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ),
148 *> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
153 *> IWORK is INTEGER array, dimension (3*N*NLVL + 11*N).
159 *> = 0: successful exit.
160 *> < 0: if INFO = -i, the i-th argument had an illegal value.
161 *> > 0: The algorithm failed to compute a singular value while
162 *> working on the submatrix lying in rows and columns
163 *> INFO/(N+1) through MOD(INFO,N+1).
169 *> \author Univ. of Tennessee
170 *> \author Univ. of California Berkeley
171 *> \author Univ. of Colorado Denver
174 *> \date September 2012
176 *> \ingroup complexOTHERcomputational
178 *> \par Contributors:
181 *> Ming Gu and Ren-Cang Li, Computer Science Division, University of
182 *> California at Berkeley, USA \n
183 *> Osni Marques, LBNL/NERSC, USA \n
185 * =====================================================================
186 SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
187 $ RANK, WORK, RWORK, IWORK, INFO )
189 * -- LAPACK computational routine (version 3.4.2) --
190 * -- LAPACK is a software package provided by Univ. of Tennessee, --
191 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
194 * .. Scalar Arguments ..
196 INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ
199 * .. Array Arguments ..
201 REAL D( * ), E( * ), RWORK( * )
202 COMPLEX B( LDB, * ), WORK( * )
205 * =====================================================================
209 PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
211 PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) )
213 * .. Local Scalars ..
214 INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
215 $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB,
216 $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG,
217 $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB,
218 $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1,
220 REAL CS, EPS, ORGNRM, R, RCND, SN, TOL
222 * .. External Functions ..
225 EXTERNAL ISAMAX, SLAMCH, SLANST
227 * .. External Subroutines ..
228 EXTERNAL CCOPY, CLACPY, CLALSA, CLASCL, CLASET, CSROT,
229 $ SGEMM, SLARTG, SLASCL, SLASDA, SLASDQ, SLASET,
232 * .. Intrinsic Functions ..
233 INTRINSIC ABS, AIMAG, CMPLX, INT, LOG, REAL, SIGN
235 * .. Executable Statements ..
237 * Test the input parameters.
243 ELSE IF( NRHS.LT.1 ) THEN
245 ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
249 CALL XERBLA( 'CLALSD', -INFO )
253 EPS = SLAMCH( 'Epsilon' )
255 * Set up the tolerance.
257 IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
265 * Quick return if possible.
269 ELSE IF( N.EQ.1 ) THEN
270 IF( D( 1 ).EQ.ZERO ) THEN
271 CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB )
274 CALL CLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
275 D( 1 ) = ABS( D( 1 ) )
280 * Rotate the matrix if it is lower bidiagonal.
282 IF( UPLO.EQ.'L' ) THEN
284 CALL SLARTG( D( I ), E( I ), CS, SN, R )
287 D( I+1 ) = CS*D( I+1 )
289 CALL CSROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
300 CALL CSROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
309 ORGNRM = SLANST( 'M', N, D, E )
310 IF( ORGNRM.EQ.ZERO ) THEN
311 CALL CLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB )
315 CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
316 CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
318 * If N is smaller than the minimum divide size SMLSIZ, then solve
319 * the problem with another solver.
321 IF( N.LE.SMLSIZ ) THEN
326 IRWIB = IRWRB + N*NRHS
327 IRWB = IRWIB + N*NRHS
328 CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N )
329 CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N )
330 CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N,
331 $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1,
332 $ RWORK( IRWWRK ), INFO )
337 * In the real version, B is passed to SLASDQ and multiplied
338 * internally by Q**H. Here B is complex and that product is
339 * computed below in two steps (real and imaginary parts).
345 RWORK( J ) = REAL( B( JROW, JCOL ) )
348 CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
349 $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
354 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
357 CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
358 $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
365 B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) )
369 TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) )
371 IF( D( I ).LE.TOL ) THEN
372 CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
374 CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
380 * Since B is complex, the following call to SGEMM is performed
381 * in two steps (real and imaginary parts). That is for V * B
382 * (in the real version of the code V**H is stored in WORK).
384 * CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
385 * $ WORK( NWORK ), N )
388 DO 120 JCOL = 1, NRHS
391 RWORK( J ) = REAL( B( JROW, JCOL ) )
394 CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
395 $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
397 DO 140 JCOL = 1, NRHS
400 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
403 CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
404 $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
407 DO 160 JCOL = 1, NRHS
411 B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) )
417 CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
418 CALL SLASRT( 'D', N, D, INFO )
419 CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
424 * Book-keeping and setting up some constants.
426 NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
438 GIVNUM = POLES + 2*NLVL*N
439 NRWORK = GIVNUM + 2*NLVL*N
443 IRWIB = IRWRB + SMLSIZ*NRHS
444 IRWB = IRWIB + SMLSIZ*NRHS
450 GIVCOL = PERM + NLVL*N
451 IWK = GIVCOL + NLVL*N*2
460 IF( ABS( D( I ) ).LT.EPS ) THEN
461 D( I ) = SIGN( EPS, D( I ) )
466 IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
470 * Subproblem found. First determine its size and then
471 * apply divide and conquer on it.
475 * A subproblem with E(I) small for I < NM1.
478 IWORK( SIZEI+NSUB-1 ) = NSIZE
479 ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
481 * A subproblem with E(NM1) not too small but I = NM1.
484 IWORK( SIZEI+NSUB-1 ) = NSIZE
487 * A subproblem with E(NM1) small. This implies an
488 * 1-by-1 subproblem at D(N), which is not solved
492 IWORK( SIZEI+NSUB-1 ) = NSIZE
495 IWORK( SIZEI+NSUB-1 ) = 1
496 CALL CCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
499 IF( NSIZE.EQ.1 ) THEN
501 * This is a 1-by-1 subproblem and is not solved
504 CALL CCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
505 ELSE IF( NSIZE.LE.SMLSIZ ) THEN
507 * This is a small subproblem and is solved by SLASDQ.
509 CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
510 $ RWORK( VT+ST1 ), N )
511 CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
512 $ RWORK( U+ST1 ), N )
513 CALL SLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ),
514 $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ),
515 $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ),
521 * In the real version, B is passed to SLASDQ and multiplied
522 * internally by Q**H. Here B is complex and that product is
523 * computed below in two steps (real and imaginary parts).
526 DO 190 JCOL = 1, NRHS
527 DO 180 JROW = ST, ST + NSIZE - 1
529 RWORK( J ) = REAL( B( JROW, JCOL ) )
532 CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
533 $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
534 $ ZERO, RWORK( IRWRB ), NSIZE )
536 DO 210 JCOL = 1, NRHS
537 DO 200 JROW = ST, ST + NSIZE - 1
539 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
542 CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
543 $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
544 $ ZERO, RWORK( IRWIB ), NSIZE )
547 DO 230 JCOL = 1, NRHS
548 DO 220 JROW = ST, ST + NSIZE - 1
551 B( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
556 CALL CLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
557 $ WORK( BX+ST1 ), N )
560 * A large problem. Solve it using divide and conquer.
562 CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
563 $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ),
564 $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ),
565 $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ),
566 $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
567 $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
568 $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ),
569 $ RWORK( S+ST1 ), RWORK( NRWORK ),
570 $ IWORK( IWK ), INFO )
575 CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
576 $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N,
577 $ RWORK( VT+ST1 ), IWORK( K+ST1 ),
578 $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
579 $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
580 $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
581 $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
582 $ RWORK( C+ST1 ), RWORK( S+ST1 ),
583 $ RWORK( NRWORK ), IWORK( IWK ), INFO )
592 * Apply the singular values and treat the tiny ones as zero.
594 TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) )
598 * Some of the elements in D can be negative because 1-by-1
599 * subproblems were not solved explicitly.
601 IF( ABS( D( I ) ).LE.TOL ) THEN
602 CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N )
605 CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
606 $ WORK( BX+I-1 ), N, INFO )
608 D( I ) = ABS( D( I ) )
611 * Now apply back the right singular vectors.
617 NSIZE = IWORK( SIZEI+I-1 )
619 IF( NSIZE.EQ.1 ) THEN
620 CALL CCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
621 ELSE IF( NSIZE.LE.SMLSIZ ) THEN
623 * Since B and BX are complex, the following call to SGEMM
624 * is performed in two steps (real and imaginary parts).
626 * CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
627 * $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
628 * $ B( ST, 1 ), LDB )
632 DO 270 JCOL = 1, NRHS
634 DO 260 JROW = 1, NSIZE
636 RWORK( JREAL ) = REAL( WORK( J+JROW ) )
639 CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
640 $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
641 $ RWORK( IRWRB ), NSIZE )
644 DO 290 JCOL = 1, NRHS
646 DO 280 JROW = 1, NSIZE
648 RWORK( JIMAG ) = AIMAG( WORK( J+JROW ) )
651 CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
652 $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
653 $ RWORK( IRWIB ), NSIZE )
656 DO 310 JCOL = 1, NRHS
657 DO 300 JROW = ST, ST + NSIZE - 1
660 B( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
665 CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
666 $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N,
667 $ RWORK( VT+ST1 ), IWORK( K+ST1 ),
668 $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
669 $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
670 $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
671 $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
672 $ RWORK( C+ST1 ), RWORK( S+ST1 ),
673 $ RWORK( NRWORK ), IWORK( IWK ), INFO )
680 * Unscale and sort the singular values.
682 CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
683 CALL SLASRT( 'D', N, D, INFO )
684 CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )