1 *> \brief \b ZLALSD uses the singular value decomposition of A to solve the least squares problem.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download ZLALSD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlalsd.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlalsd.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlalsd.f">
21 * SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
22 * RANK, WORK, RWORK, IWORK, INFO )
24 * .. Scalar Arguments ..
26 * INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ
27 * DOUBLE PRECISION RCOND
29 * .. Array Arguments ..
31 * DOUBLE PRECISION D( * ), E( * ), RWORK( * )
32 * COMPLEX*16 B( LDB, * ), WORK( * )
41 *> ZLALSD uses the singular value decomposition of A to solve the least
42 *> squares problem of finding X to minimize the Euclidean norm of each
43 *> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
44 *> are N-by-NRHS. The solution X overwrites B.
46 *> The singular values of A smaller than RCOND times the largest
47 *> singular value are treated as zero in solving the least squares
48 *> problem; in this case a minimum norm solution is returned.
49 *> The actual singular values are returned in D in ascending order.
51 *> This code makes very mild assumptions about floating point
52 *> arithmetic. It will work on machines with a guard digit in
53 *> add/subtract, or on those binary machines without guard digits
54 *> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
55 *> It could conceivably fail on hexadecimal or decimal machines
56 *> without guard digits, but we know of none.
64 *> UPLO is CHARACTER*1
65 *> = 'U': D and E define an upper bidiagonal matrix.
66 *> = 'L': D and E define a lower bidiagonal matrix.
72 *> The maximum size of the subproblems at the bottom of the
79 *> The dimension of the bidiagonal matrix. N >= 0.
85 *> The number of columns of B. NRHS must be at least 1.
90 *> D is DOUBLE PRECISION array, dimension (N)
91 *> On entry D contains the main diagonal of the bidiagonal
92 *> matrix. On exit, if INFO = 0, D contains its singular values.
97 *> E is DOUBLE PRECISION array, dimension (N-1)
98 *> Contains the super-diagonal entries of the bidiagonal matrix.
99 *> On exit, E has been destroyed.
104 *> B is COMPLEX*16 array, dimension (LDB,NRHS)
105 *> On input, B contains the right hand sides of the least
106 *> squares problem. On output, B contains the solution X.
112 *> The leading dimension of B in the calling subprogram.
113 *> LDB must be at least max(1,N).
118 *> RCOND is DOUBLE PRECISION
119 *> The singular values of A less than or equal to RCOND times
120 *> the largest singular value are treated as zero in solving
121 *> the least squares problem. If RCOND is negative,
122 *> machine precision is used instead.
123 *> For example, if diag(S)*X=B were the least squares problem,
124 *> where diag(S) is a diagonal matrix of singular values, the
125 *> solution would be X(i) = B(i) / S(i) if S(i) is greater than
126 *> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
133 *> The number of singular values of A greater than RCOND times
134 *> the largest singular value.
139 *> WORK is COMPLEX*16 array, dimension at least
145 *> RWORK is DOUBLE PRECISION array, dimension at least
146 *> (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
147 *> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ),
149 *> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
154 *> IWORK is INTEGER array, dimension at least
155 *> (3*N*NLVL + 11*N).
161 *> = 0: successful exit.
162 *> < 0: if INFO = -i, the i-th argument had an illegal value.
163 *> > 0: The algorithm failed to compute a singular value while
164 *> working on the submatrix lying in rows and columns
165 *> INFO/(N+1) through MOD(INFO,N+1).
171 *> \author Univ. of Tennessee
172 *> \author Univ. of California Berkeley
173 *> \author Univ. of Colorado Denver
176 *> \date September 2012
178 *> \ingroup complex16OTHERcomputational
180 *> \par Contributors:
183 *> Ming Gu and Ren-Cang Li, Computer Science Division, University of
184 *> California at Berkeley, USA \n
185 *> Osni Marques, LBNL/NERSC, USA \n
187 * =====================================================================
188 SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
189 $ RANK, WORK, RWORK, IWORK, INFO )
191 * -- LAPACK computational routine (version 3.4.2) --
192 * -- LAPACK is a software package provided by Univ. of Tennessee, --
193 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
196 * .. Scalar Arguments ..
198 INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ
199 DOUBLE PRECISION RCOND
201 * .. Array Arguments ..
203 DOUBLE PRECISION D( * ), E( * ), RWORK( * )
204 COMPLEX*16 B( LDB, * ), WORK( * )
207 * =====================================================================
210 DOUBLE PRECISION ZERO, ONE, TWO
211 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
213 PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) )
215 * .. Local Scalars ..
216 INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
217 $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB,
218 $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG,
219 $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB,
220 $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1,
222 DOUBLE PRECISION CS, EPS, ORGNRM, RCND, R, SN, TOL
224 * .. External Functions ..
226 DOUBLE PRECISION DLAMCH, DLANST
227 EXTERNAL IDAMAX, DLAMCH, DLANST
229 * .. External Subroutines ..
230 EXTERNAL DGEMM, DLARTG, DLASCL, DLASDA, DLASDQ, DLASET,
231 $ DLASRT, XERBLA, ZCOPY, ZDROT, ZLACPY, ZLALSA,
234 * .. Intrinsic Functions ..
235 INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, LOG, SIGN
237 * .. Executable Statements ..
239 * Test the input parameters.
245 ELSE IF( NRHS.LT.1 ) THEN
247 ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
251 CALL XERBLA( 'ZLALSD', -INFO )
255 EPS = DLAMCH( 'Epsilon' )
257 * Set up the tolerance.
259 IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
267 * Quick return if possible.
271 ELSE IF( N.EQ.1 ) THEN
272 IF( D( 1 ).EQ.ZERO ) THEN
273 CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB )
276 CALL ZLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
277 D( 1 ) = ABS( D( 1 ) )
282 * Rotate the matrix if it is lower bidiagonal.
284 IF( UPLO.EQ.'L' ) THEN
286 CALL DLARTG( D( I ), E( I ), CS, SN, R )
289 D( I+1 ) = CS*D( I+1 )
291 CALL ZDROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
302 CALL ZDROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
311 ORGNRM = DLANST( 'M', N, D, E )
312 IF( ORGNRM.EQ.ZERO ) THEN
313 CALL ZLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB )
317 CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
318 CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
320 * If N is smaller than the minimum divide size SMLSIZ, then solve
321 * the problem with another solver.
323 IF( N.LE.SMLSIZ ) THEN
328 IRWIB = IRWRB + N*NRHS
329 IRWB = IRWIB + N*NRHS
330 CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N )
331 CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N )
332 CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N,
333 $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1,
334 $ RWORK( IRWWRK ), INFO )
339 * In the real version, B is passed to DLASDQ and multiplied
340 * internally by Q**H. Here B is complex and that product is
341 * computed below in two steps (real and imaginary parts).
347 RWORK( J ) = DBLE( B( JROW, JCOL ) )
350 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
351 $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
356 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
359 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
360 $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
367 B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
372 TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
374 IF( D( I ).LE.TOL ) THEN
375 CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
377 CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
383 * Since B is complex, the following call to DGEMM is performed
384 * in two steps (real and imaginary parts). That is for V * B
385 * (in the real version of the code V**H is stored in WORK).
387 * CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
388 * $ WORK( NWORK ), N )
391 DO 120 JCOL = 1, NRHS
394 RWORK( J ) = DBLE( B( JROW, JCOL ) )
397 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
398 $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
400 DO 140 JCOL = 1, NRHS
403 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
406 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
407 $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
410 DO 160 JCOL = 1, NRHS
414 B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
421 CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
422 CALL DLASRT( 'D', N, D, INFO )
423 CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
428 * Book-keeping and setting up some constants.
430 NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
442 GIVNUM = POLES + 2*NLVL*N
443 NRWORK = GIVNUM + 2*NLVL*N
447 IRWIB = IRWRB + SMLSIZ*NRHS
448 IRWB = IRWIB + SMLSIZ*NRHS
454 GIVCOL = PERM + NLVL*N
455 IWK = GIVCOL + NLVL*N*2
464 IF( ABS( D( I ) ).LT.EPS ) THEN
465 D( I ) = SIGN( EPS, D( I ) )
470 IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
474 * Subproblem found. First determine its size and then
475 * apply divide and conquer on it.
479 * A subproblem with E(I) small for I < NM1.
482 IWORK( SIZEI+NSUB-1 ) = NSIZE
483 ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
485 * A subproblem with E(NM1) not too small but I = NM1.
488 IWORK( SIZEI+NSUB-1 ) = NSIZE
491 * A subproblem with E(NM1) small. This implies an
492 * 1-by-1 subproblem at D(N), which is not solved
496 IWORK( SIZEI+NSUB-1 ) = NSIZE
499 IWORK( SIZEI+NSUB-1 ) = 1
500 CALL ZCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
503 IF( NSIZE.EQ.1 ) THEN
505 * This is a 1-by-1 subproblem and is not solved
508 CALL ZCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
509 ELSE IF( NSIZE.LE.SMLSIZ ) THEN
511 * This is a small subproblem and is solved by DLASDQ.
513 CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
514 $ RWORK( VT+ST1 ), N )
515 CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
516 $ RWORK( U+ST1 ), N )
517 CALL DLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ),
518 $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ),
519 $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ),
525 * In the real version, B is passed to DLASDQ and multiplied
526 * internally by Q**H. Here B is complex and that product is
527 * computed below in two steps (real and imaginary parts).
530 DO 190 JCOL = 1, NRHS
531 DO 180 JROW = ST, ST + NSIZE - 1
533 RWORK( J ) = DBLE( B( JROW, JCOL ) )
536 CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
537 $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
538 $ ZERO, RWORK( IRWRB ), NSIZE )
540 DO 210 JCOL = 1, NRHS
541 DO 200 JROW = ST, ST + NSIZE - 1
543 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
546 CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
547 $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
548 $ ZERO, RWORK( IRWIB ), NSIZE )
551 DO 230 JCOL = 1, NRHS
552 DO 220 JROW = ST, ST + NSIZE - 1
555 B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
560 CALL ZLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
561 $ WORK( BX+ST1 ), N )
564 * A large problem. Solve it using divide and conquer.
566 CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
567 $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ),
568 $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ),
569 $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ),
570 $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
571 $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
572 $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ),
573 $ RWORK( S+ST1 ), RWORK( NRWORK ),
574 $ IWORK( IWK ), INFO )
579 CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
580 $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N,
581 $ RWORK( VT+ST1 ), IWORK( K+ST1 ),
582 $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
583 $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
584 $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
585 $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
586 $ RWORK( C+ST1 ), RWORK( S+ST1 ),
587 $ RWORK( NRWORK ), IWORK( IWK ), INFO )
596 * Apply the singular values and treat the tiny ones as zero.
598 TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
602 * Some of the elements in D can be negative because 1-by-1
603 * subproblems were not solved explicitly.
605 IF( ABS( D( I ) ).LE.TOL ) THEN
606 CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N )
609 CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
610 $ WORK( BX+I-1 ), N, INFO )
612 D( I ) = ABS( D( I ) )
615 * Now apply back the right singular vectors.
621 NSIZE = IWORK( SIZEI+I-1 )
623 IF( NSIZE.EQ.1 ) THEN
624 CALL ZCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
625 ELSE IF( NSIZE.LE.SMLSIZ ) THEN
627 * Since B and BX are complex, the following call to DGEMM
628 * is performed in two steps (real and imaginary parts).
630 * CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
631 * $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
632 * $ B( ST, 1 ), LDB )
636 DO 270 JCOL = 1, NRHS
638 DO 260 JROW = 1, NSIZE
640 RWORK( JREAL ) = DBLE( WORK( J+JROW ) )
643 CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
644 $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
645 $ RWORK( IRWRB ), NSIZE )
648 DO 290 JCOL = 1, NRHS
650 DO 280 JROW = 1, NSIZE
652 RWORK( JIMAG ) = DIMAG( WORK( J+JROW ) )
655 CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
656 $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
657 $ RWORK( IRWIB ), NSIZE )
660 DO 310 JCOL = 1, NRHS
661 DO 300 JROW = ST, ST + NSIZE - 1
664 B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
669 CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
670 $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N,
671 $ RWORK( VT+ST1 ), IWORK( K+ST1 ),
672 $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
673 $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
674 $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
675 $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
676 $ RWORK( C+ST1 ), RWORK( S+ST1 ),
677 $ RWORK( NRWORK ), IWORK( IWK ), INFO )
684 * Unscale and sort the singular values.
686 CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
687 CALL DLASRT( 'D', N, D, INFO )
688 CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )