1 *> \brief <b> ZGESVD computes the singular value decomposition (SVD) for GE matrices</b>
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download ZGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvd.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvd.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvd.f">
21 * SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
28 * .. Array Arguments ..
29 * DOUBLE PRECISION RWORK( * ), S( * )
30 * COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
40 *> ZGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
44 *> A = U * SIGMA * conjugate-transpose(V)
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
53 *> Note that the routine returns V**H, not V.
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
85 *> JOBVT and JOBU cannot both be 'O'.
91 *> The number of rows of the input matrix A. M >= 0.
97 *> The number of columns of the input matrix A. N >= 0.
102 *> A is COMPLEX*16 array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
118 *> The leading dimension of the array A. LDA >= max(1,M).
123 *> S is DOUBLE PRECISION array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
129 *> U is COMPLEX*16 array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
146 *> VT is COMPLEX*16 array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
163 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
182 *> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if ZBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
211 *> \ingroup complex16GEsing
213 * =====================================================================
214 SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
215 $ VT, LDVT, WORK, LWORK, RWORK, INFO )
217 * -- LAPACK driver routine (version 3.6.1) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
222 * .. Scalar Arguments ..
223 CHARACTER JOBU, JOBVT
224 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
226 * .. Array Arguments ..
227 DOUBLE PRECISION RWORK( * ), S( * )
228 COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
232 * =====================================================================
235 COMPLEX*16 CZERO, CONE
236 PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ),
237 $ CONE = ( 1.0D0, 0.0D0 ) )
238 DOUBLE PRECISION ZERO, ONE
239 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
241 * .. Local Scalars ..
242 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
244 INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
246 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
248 INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M,
249 $ LWORK_ZGEBRD, LWORK_ZUNGBR_P, LWORK_ZUNGBR_Q,
250 $ LWORK_ZGELQF, LWORK_ZUNGLQ_N, LWORK_ZUNGLQ_M
251 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
254 DOUBLE PRECISION DUM( 1 )
257 * .. External Subroutines ..
258 EXTERNAL DLASCL, XERBLA, ZBDSQR, ZGEBRD, ZGELQF, ZGEMM,
259 $ ZGEQRF, ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNGLQ,
262 * .. External Functions ..
265 DOUBLE PRECISION DLAMCH, ZLANGE
266 EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE
268 * .. Intrinsic Functions ..
269 INTRINSIC MAX, MIN, SQRT
271 * .. Executable Statements ..
273 * Test the input arguments
277 WNTUA = LSAME( JOBU, 'A' )
278 WNTUS = LSAME( JOBU, 'S' )
279 WNTUAS = WNTUA .OR. WNTUS
280 WNTUO = LSAME( JOBU, 'O' )
281 WNTUN = LSAME( JOBU, 'N' )
282 WNTVA = LSAME( JOBVT, 'A' )
283 WNTVS = LSAME( JOBVT, 'S' )
284 WNTVAS = WNTVA .OR. WNTVS
285 WNTVO = LSAME( JOBVT, 'O' )
286 WNTVN = LSAME( JOBVT, 'N' )
287 LQUERY = ( LWORK.EQ.-1 )
289 IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
291 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
292 $ ( WNTVO .AND. WNTUO ) ) THEN
294 ELSE IF( M.LT.0 ) THEN
296 ELSE IF( N.LT.0 ) THEN
298 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
300 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
302 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
303 $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
318 IF( M.GE.N .AND. MINMN.GT.0 ) THEN
320 * Space needed for ZBDSQR is BDSPAC = 5*N
322 MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
323 * Compute space needed for ZGEQRF
324 CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
325 LWORK_ZGEQRF = INT( CDUM(1) )
326 * Compute space needed for ZUNGQR
327 CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
328 LWORK_ZUNGQR_N = INT( CDUM(1) )
329 CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
330 LWORK_ZUNGQR_M = INT( CDUM(1) )
331 * Compute space needed for ZGEBRD
332 CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
333 $ CDUM(1), CDUM(1), -1, IERR )
334 LWORK_ZGEBRD = INT( CDUM(1) )
335 * Compute space needed for ZUNGBR
336 CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
337 $ CDUM(1), -1, IERR )
338 LWORK_ZUNGBR_P = INT( CDUM(1) )
339 CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
340 $ CDUM(1), -1, IERR )
341 LWORK_ZUNGBR_Q = INT( CDUM(1) )
343 IF( M.GE.MNTHR ) THEN
346 * Path 1 (M much larger than N, JOBU='N')
348 MAXWRK = N + LWORK_ZGEQRF
349 MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZGEBRD )
350 IF( WNTVO .OR. WNTVAS )
351 $ MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P )
353 ELSE IF( WNTUO .AND. WNTVN ) THEN
355 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
357 WRKBL = N + LWORK_ZGEQRF
358 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
359 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
360 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
361 MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
363 ELSE IF( WNTUO .AND. WNTVAS ) THEN
365 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
368 WRKBL = N + LWORK_ZGEQRF
369 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
370 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
371 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
372 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
373 MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
375 ELSE IF( WNTUS .AND. WNTVN ) THEN
377 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
379 WRKBL = N + LWORK_ZGEQRF
380 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
381 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
382 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
385 ELSE IF( WNTUS .AND. WNTVO ) THEN
387 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
389 WRKBL = N + LWORK_ZGEQRF
390 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
391 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
392 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
393 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
394 MAXWRK = 2*N*N + WRKBL
396 ELSE IF( WNTUS .AND. WNTVAS ) THEN
398 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
401 WRKBL = N + LWORK_ZGEQRF
402 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
403 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
404 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
405 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
408 ELSE IF( WNTUA .AND. WNTVN ) THEN
410 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
412 WRKBL = N + LWORK_ZGEQRF
413 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
414 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
415 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
418 ELSE IF( WNTUA .AND. WNTVO ) THEN
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
422 WRKBL = N + LWORK_ZGEQRF
423 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
424 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
425 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
426 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
427 MAXWRK = 2*N*N + WRKBL
429 ELSE IF( WNTUA .AND. WNTVAS ) THEN
431 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
434 WRKBL = N + LWORK_ZGEQRF
435 WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
436 WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
437 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
438 WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
444 * Path 10 (M at least N, but not much larger)
446 CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
447 $ CDUM(1), CDUM(1), -1, IERR )
448 LWORK_ZGEBRD = INT( CDUM(1) )
449 MAXWRK = 2*N + LWORK_ZGEBRD
450 IF( WNTUS .OR. WNTUO ) THEN
451 CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
452 $ CDUM(1), -1, IERR )
453 LWORK_ZUNGBR_Q = INT( CDUM(1) )
454 MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
457 CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
458 $ CDUM(1), -1, IERR )
459 LWORK_ZUNGBR_Q = INT( CDUM(1) )
460 MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
462 IF( .NOT.WNTVN ) THEN
463 MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P )
467 ELSE IF( MINMN.GT.0 ) THEN
469 * Space needed for ZBDSQR is BDSPAC = 5*M
471 MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
472 * Compute space needed for ZGELQF
473 CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
474 LWORK_ZGELQF = INT( CDUM(1) )
475 * Compute space needed for ZUNGLQ
476 CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
478 LWORK_ZUNGLQ_N = INT( CDUM(1) )
479 CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
480 LWORK_ZUNGLQ_M = INT( CDUM(1) )
481 * Compute space needed for ZGEBRD
482 CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
483 $ CDUM(1), CDUM(1), -1, IERR )
484 LWORK_ZGEBRD = INT( CDUM(1) )
485 * Compute space needed for ZUNGBR P
486 CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1),
487 $ CDUM(1), -1, IERR )
488 LWORK_ZUNGBR_P = INT( CDUM(1) )
489 * Compute space needed for ZUNGBR Q
490 CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1),
491 $ CDUM(1), -1, IERR )
492 LWORK_ZUNGBR_Q = INT( CDUM(1) )
493 IF( N.GE.MNTHR ) THEN
496 * Path 1t(N much larger than M, JOBVT='N')
498 MAXWRK = M + LWORK_ZGELQF
499 MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZGEBRD )
500 IF( WNTUO .OR. WNTUAS )
501 $ MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q )
503 ELSE IF( WNTVO .AND. WNTUN ) THEN
505 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 WRKBL = M + LWORK_ZGELQF
508 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
509 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
510 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
511 MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
513 ELSE IF( WNTVO .AND. WNTUAS ) THEN
515 * Path 3t(N much larger than M, JOBU='S' or 'A',
518 WRKBL = M + LWORK_ZGELQF
519 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
520 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
521 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
522 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
523 MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
525 ELSE IF( WNTVS .AND. WNTUN ) THEN
527 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
529 WRKBL = M + LWORK_ZGELQF
530 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
531 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
532 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
535 ELSE IF( WNTVS .AND. WNTUO ) THEN
537 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
539 WRKBL = M + LWORK_ZGELQF
540 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
541 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
542 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
543 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
544 MAXWRK = 2*M*M + WRKBL
546 ELSE IF( WNTVS .AND. WNTUAS ) THEN
548 * Path 6t(N much larger than M, JOBU='S' or 'A',
551 WRKBL = M + LWORK_ZGELQF
552 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
553 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
554 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
555 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
558 ELSE IF( WNTVA .AND. WNTUN ) THEN
560 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
562 WRKBL = M + LWORK_ZGELQF
563 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
564 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
565 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
568 ELSE IF( WNTVA .AND. WNTUO ) THEN
570 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
572 WRKBL = M + LWORK_ZGELQF
573 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
574 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
575 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
576 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
577 MAXWRK = 2*M*M + WRKBL
579 ELSE IF( WNTVA .AND. WNTUAS ) THEN
581 * Path 9t(N much larger than M, JOBU='S' or 'A',
584 WRKBL = M + LWORK_ZGELQF
585 WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
586 WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
587 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
588 WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
594 * Path 10t(N greater than M, but not much larger)
596 CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
597 $ CDUM(1), CDUM(1), -1, IERR )
598 LWORK_ZGEBRD = INT( CDUM(1) )
599 MAXWRK = 2*M + LWORK_ZGEBRD
600 IF( WNTVS .OR. WNTVO ) THEN
601 * Compute space needed for ZUNGBR P
602 CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1),
603 $ CDUM(1), -1, IERR )
604 LWORK_ZUNGBR_P = INT( CDUM(1) )
605 MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
608 CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1),
609 $ CDUM(1), -1, IERR )
610 LWORK_ZUNGBR_P = INT( CDUM(1) )
611 MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
613 IF( .NOT.WNTUN ) THEN
614 MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q )
619 MAXWRK = MAX( MAXWRK, MINWRK )
622 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
628 CALL XERBLA( 'ZGESVD', -INFO )
630 ELSE IF( LQUERY ) THEN
634 * Quick return if possible
636 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
640 * Get machine constants
643 SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
644 BIGNUM = ONE / SMLNUM
646 * Scale A if max element outside range [SMLNUM,BIGNUM]
648 ANRM = ZLANGE( 'M', M, N, A, LDA, DUM )
650 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
652 CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
653 ELSE IF( ANRM.GT.BIGNUM ) THEN
655 CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
660 * A has at least as many rows as columns. If A has sufficiently
661 * more rows than columns, first reduce using the QR
662 * decomposition (if sufficient workspace available)
664 IF( M.GE.MNTHR ) THEN
668 * Path 1 (M much larger than N, JOBU='N')
669 * No left singular vectors to be computed
675 * (CWorkspace: need 2*N, prefer N+N*NB)
676 * (RWorkspace: need 0)
678 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
679 $ LWORK-IWORK+1, IERR )
684 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
692 * Bidiagonalize R in A
693 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
694 * (RWorkspace: need N)
696 CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
697 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
700 IF( WNTVO .OR. WNTVAS ) THEN
702 * If right singular vectors desired, generate P'.
703 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
706 CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
707 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
712 * Perform bidiagonal QR iteration, computing right
713 * singular vectors of A in A if desired
715 * (RWorkspace: need BDSPAC)
717 CALL ZBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
718 $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
720 * If right singular vectors desired in VT, copy them there
723 $ CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT )
725 ELSE IF( WNTUO .AND. WNTVN ) THEN
727 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
728 * N left singular vectors to be overwritten on A and
729 * no right singular vectors to be computed
731 IF( LWORK.GE.N*N+3*N ) THEN
733 * Sufficient workspace for a fast algorithm
736 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
738 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
742 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
744 * WORK(IU) is LDA by N, WORK(IR) is N by N
750 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
752 LDWRKU = ( LWORK-N*N ) / N
759 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
762 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
763 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
765 * Copy R to WORK(IR) and zero out below it
767 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
768 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
769 $ WORK( IR+1 ), LDWRKR )
772 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
775 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
776 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
782 * Bidiagonalize R in WORK(IR)
783 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
784 * (RWorkspace: need N)
786 CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
787 $ WORK( ITAUQ ), WORK( ITAUP ),
788 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
790 * Generate left vectors bidiagonalizing R
791 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
792 * (RWorkspace: need 0)
794 CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
795 $ WORK( ITAUQ ), WORK( IWORK ),
796 $ LWORK-IWORK+1, IERR )
799 * Perform bidiagonal QR iteration, computing left
800 * singular vectors of R in WORK(IR)
801 * (CWorkspace: need N*N)
802 * (RWorkspace: need BDSPAC)
804 CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
805 $ WORK( IR ), LDWRKR, CDUM, 1,
806 $ RWORK( IRWORK ), INFO )
809 * Multiply Q in A by left singular vectors of R in
810 * WORK(IR), storing result in WORK(IU) and copying to A
811 * (CWorkspace: need N*N+N, prefer N*N+M*N)
814 DO 10 I = 1, M, LDWRKU
815 CHUNK = MIN( M-I+1, LDWRKU )
816 CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
817 $ LDA, WORK( IR ), LDWRKR, CZERO,
818 $ WORK( IU ), LDWRKU )
819 CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
825 * Insufficient workspace for a fast algorithm
833 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
836 CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
837 $ WORK( ITAUQ ), WORK( ITAUP ),
838 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
840 * Generate left vectors bidiagonalizing A
841 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
844 CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
845 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
848 * Perform bidiagonal QR iteration, computing left
849 * singular vectors of A in A
850 * (CWorkspace: need 0)
851 * (RWorkspace: need BDSPAC)
853 CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
854 $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
858 ELSE IF( WNTUO .AND. WNTVAS ) THEN
860 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
861 * N left singular vectors to be overwritten on A and
862 * N right singular vectors to be computed in VT
864 IF( LWORK.GE.N*N+3*N ) THEN
866 * Sufficient workspace for a fast algorithm
869 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
871 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
875 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
877 * WORK(IU) is LDA by N and WORK(IR) is N by N
883 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
885 LDWRKU = ( LWORK-N*N ) / N
892 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
895 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
896 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
898 * Copy R to VT, zeroing out below it
900 CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
902 $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
906 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
909 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
910 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
916 * Bidiagonalize R in VT, copying result to WORK(IR)
917 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
918 * (RWorkspace: need N)
920 CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
921 $ WORK( ITAUQ ), WORK( ITAUP ),
922 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
923 CALL ZLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
925 * Generate left vectors bidiagonalizing R in WORK(IR)
926 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
929 CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
930 $ WORK( ITAUQ ), WORK( IWORK ),
931 $ LWORK-IWORK+1, IERR )
933 * Generate right vectors bidiagonalizing R in VT
934 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
937 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
938 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
941 * Perform bidiagonal QR iteration, computing left
942 * singular vectors of R in WORK(IR) and computing right
943 * singular vectors of R in VT
944 * (CWorkspace: need N*N)
945 * (RWorkspace: need BDSPAC)
947 CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
948 $ LDVT, WORK( IR ), LDWRKR, CDUM, 1,
949 $ RWORK( IRWORK ), INFO )
952 * Multiply Q in A by left singular vectors of R in
953 * WORK(IR), storing result in WORK(IU) and copying to A
954 * (CWorkspace: need N*N+N, prefer N*N+M*N)
957 DO 20 I = 1, M, LDWRKU
958 CHUNK = MIN( M-I+1, LDWRKU )
959 CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
960 $ LDA, WORK( IR ), LDWRKR, CZERO,
961 $ WORK( IU ), LDWRKU )
962 CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
968 * Insufficient workspace for a fast algorithm
974 * (CWorkspace: need 2*N, prefer N+N*NB)
977 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
978 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
980 * Copy R to VT, zeroing out below it
982 CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
984 $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
988 * (CWorkspace: need 2*N, prefer N+N*NB)
991 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
992 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
998 * Bidiagonalize R in VT
999 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1002 CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1003 $ WORK( ITAUQ ), WORK( ITAUP ),
1004 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1006 * Multiply Q in A by left vectors bidiagonalizing R
1007 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1010 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1011 $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
1012 $ LWORK-IWORK+1, IERR )
1014 * Generate right vectors bidiagonalizing R in VT
1015 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1018 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1019 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1022 * Perform bidiagonal QR iteration, computing left
1023 * singular vectors of A in A and computing right
1024 * singular vectors of A in VT
1026 * (RWorkspace: need BDSPAC)
1028 CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1029 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
1034 ELSE IF( WNTUS ) THEN
1038 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1039 * N left singular vectors to be computed in U and
1040 * no right singular vectors to be computed
1042 IF( LWORK.GE.N*N+3*N ) THEN
1044 * Sufficient workspace for a fast algorithm
1047 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1049 * WORK(IR) is LDA by N
1054 * WORK(IR) is N by N
1058 ITAU = IR + LDWRKR*N
1062 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1065 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1066 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1068 * Copy R to WORK(IR), zeroing out below it
1070 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
1072 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1073 $ WORK( IR+1 ), LDWRKR )
1076 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1079 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1080 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1086 * Bidiagonalize R in WORK(IR)
1087 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1088 * (RWorkspace: need N)
1090 CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
1091 $ RWORK( IE ), WORK( ITAUQ ),
1092 $ WORK( ITAUP ), WORK( IWORK ),
1093 $ LWORK-IWORK+1, IERR )
1095 * Generate left vectors bidiagonalizing R in WORK(IR)
1096 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1099 CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1100 $ WORK( ITAUQ ), WORK( IWORK ),
1101 $ LWORK-IWORK+1, IERR )
1104 * Perform bidiagonal QR iteration, computing left
1105 * singular vectors of R in WORK(IR)
1106 * (CWorkspace: need N*N)
1107 * (RWorkspace: need BDSPAC)
1109 CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1110 $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1111 $ RWORK( IRWORK ), INFO )
1113 * Multiply Q in A by left singular vectors of R in
1114 * WORK(IR), storing result in U
1115 * (CWorkspace: need N*N)
1118 CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1119 $ WORK( IR ), LDWRKR, CZERO, U, LDU )
1123 * Insufficient workspace for a fast algorithm
1128 * Compute A=Q*R, copying result to U
1129 * (CWorkspace: need 2*N, prefer N+N*NB)
1132 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1133 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1134 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1137 * (CWorkspace: need 2*N, prefer N+N*NB)
1140 CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1141 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1147 * Zero out below R in A
1150 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1154 * Bidiagonalize R in A
1155 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1156 * (RWorkspace: need N)
1158 CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1159 $ WORK( ITAUQ ), WORK( ITAUP ),
1160 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1162 * Multiply Q in U by left vectors bidiagonalizing R
1163 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1166 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1167 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1168 $ LWORK-IWORK+1, IERR )
1171 * Perform bidiagonal QR iteration, computing left
1172 * singular vectors of A in U
1174 * (RWorkspace: need BDSPAC)
1176 CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1177 $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1182 ELSE IF( WNTVO ) THEN
1184 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1185 * N left singular vectors to be computed in U and
1186 * N right singular vectors to be overwritten on A
1188 IF( LWORK.GE.2*N*N+3*N ) THEN
1190 * Sufficient workspace for a fast algorithm
1193 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1195 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1200 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1202 * WORK(IU) is LDA by N and WORK(IR) is N by N
1209 * WORK(IU) is N by N and WORK(IR) is N by N
1215 ITAU = IR + LDWRKR*N
1219 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1222 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1223 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1225 * Copy R to WORK(IU), zeroing out below it
1227 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1229 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1230 $ WORK( IU+1 ), LDWRKU )
1233 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1236 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1237 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1243 * Bidiagonalize R in WORK(IU), copying result to
1245 * (CWorkspace: need 2*N*N+3*N,
1246 * prefer 2*N*N+2*N+2*N*NB)
1247 * (RWorkspace: need N)
1249 CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1250 $ RWORK( IE ), WORK( ITAUQ ),
1251 $ WORK( ITAUP ), WORK( IWORK ),
1252 $ LWORK-IWORK+1, IERR )
1253 CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1254 $ WORK( IR ), LDWRKR )
1256 * Generate left bidiagonalizing vectors in WORK(IU)
1257 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1260 CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1261 $ WORK( ITAUQ ), WORK( IWORK ),
1262 $ LWORK-IWORK+1, IERR )
1264 * Generate right bidiagonalizing vectors in WORK(IR)
1265 * (CWorkspace: need 2*N*N+3*N-1,
1266 * prefer 2*N*N+2*N+(N-1)*NB)
1269 CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1270 $ WORK( ITAUP ), WORK( IWORK ),
1271 $ LWORK-IWORK+1, IERR )
1274 * Perform bidiagonal QR iteration, computing left
1275 * singular vectors of R in WORK(IU) and computing
1276 * right singular vectors of R in WORK(IR)
1277 * (CWorkspace: need 2*N*N)
1278 * (RWorkspace: need BDSPAC)
1280 CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1281 $ WORK( IR ), LDWRKR, WORK( IU ),
1282 $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1285 * Multiply Q in A by left singular vectors of R in
1286 * WORK(IU), storing result in U
1287 * (CWorkspace: need N*N)
1290 CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1291 $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1293 * Copy right singular vectors of R to A
1294 * (CWorkspace: need N*N)
1297 CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1302 * Insufficient workspace for a fast algorithm
1307 * Compute A=Q*R, copying result to U
1308 * (CWorkspace: need 2*N, prefer N+N*NB)
1311 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1312 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1313 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1316 * (CWorkspace: need 2*N, prefer N+N*NB)
1319 CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1320 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1326 * Zero out below R in A
1329 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1333 * Bidiagonalize R in A
1334 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1335 * (RWorkspace: need N)
1337 CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1338 $ WORK( ITAUQ ), WORK( ITAUP ),
1339 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1341 * Multiply Q in U by left vectors bidiagonalizing R
1342 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1345 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1346 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1347 $ LWORK-IWORK+1, IERR )
1349 * Generate right vectors bidiagonalizing R in A
1350 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1353 CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1354 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1357 * Perform bidiagonal QR iteration, computing left
1358 * singular vectors of A in U and computing right
1359 * singular vectors of A in A
1361 * (RWorkspace: need BDSPAC)
1363 CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1364 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1369 ELSE IF( WNTVAS ) THEN
1371 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1373 * N left singular vectors to be computed in U and
1374 * N right singular vectors to be computed in VT
1376 IF( LWORK.GE.N*N+3*N ) THEN
1378 * Sufficient workspace for a fast algorithm
1381 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1383 * WORK(IU) is LDA by N
1388 * WORK(IU) is N by N
1392 ITAU = IU + LDWRKU*N
1396 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1399 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1400 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1402 * Copy R to WORK(IU), zeroing out below it
1404 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1406 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1407 $ WORK( IU+1 ), LDWRKU )
1410 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1413 CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1414 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1420 * Bidiagonalize R in WORK(IU), copying result to VT
1421 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1422 * (RWorkspace: need N)
1424 CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1425 $ RWORK( IE ), WORK( ITAUQ ),
1426 $ WORK( ITAUP ), WORK( IWORK ),
1427 $ LWORK-IWORK+1, IERR )
1428 CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1431 * Generate left bidiagonalizing vectors in WORK(IU)
1432 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1435 CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1436 $ WORK( ITAUQ ), WORK( IWORK ),
1437 $ LWORK-IWORK+1, IERR )
1439 * Generate right bidiagonalizing vectors in VT
1440 * (CWorkspace: need N*N+3*N-1,
1441 * prefer N*N+2*N+(N-1)*NB)
1444 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1445 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1448 * Perform bidiagonal QR iteration, computing left
1449 * singular vectors of R in WORK(IU) and computing
1450 * right singular vectors of R in VT
1451 * (CWorkspace: need N*N)
1452 * (RWorkspace: need BDSPAC)
1454 CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1455 $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1456 $ RWORK( IRWORK ), INFO )
1458 * Multiply Q in A by left singular vectors of R in
1459 * WORK(IU), storing result in U
1460 * (CWorkspace: need N*N)
1463 CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1464 $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1468 * Insufficient workspace for a fast algorithm
1473 * Compute A=Q*R, copying result to U
1474 * (CWorkspace: need 2*N, prefer N+N*NB)
1477 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1478 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1479 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1482 * (CWorkspace: need 2*N, prefer N+N*NB)
1485 CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1486 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1488 * Copy R to VT, zeroing out below it
1490 CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
1492 $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1493 $ VT( 2, 1 ), LDVT )
1499 * Bidiagonalize R in VT
1500 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1501 * (RWorkspace: need N)
1503 CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1504 $ WORK( ITAUQ ), WORK( ITAUP ),
1505 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1507 * Multiply Q in U by left bidiagonalizing vectors
1509 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1512 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1513 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1514 $ LWORK-IWORK+1, IERR )
1516 * Generate right bidiagonalizing vectors in VT
1517 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1520 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1521 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1524 * Perform bidiagonal QR iteration, computing left
1525 * singular vectors of A in U and computing right
1526 * singular vectors of A in VT
1528 * (RWorkspace: need BDSPAC)
1530 CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1531 $ LDVT, U, LDU, CDUM, 1,
1532 $ RWORK( IRWORK ), INFO )
1538 ELSE IF( WNTUA ) THEN
1542 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1543 * M left singular vectors to be computed in U and
1544 * no right singular vectors to be computed
1546 IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1548 * Sufficient workspace for a fast algorithm
1551 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1553 * WORK(IR) is LDA by N
1558 * WORK(IR) is N by N
1562 ITAU = IR + LDWRKR*N
1565 * Compute A=Q*R, copying result to U
1566 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1569 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1570 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1571 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1573 * Copy R to WORK(IR), zeroing out below it
1575 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
1577 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1578 $ WORK( IR+1 ), LDWRKR )
1581 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1584 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1585 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1591 * Bidiagonalize R in WORK(IR)
1592 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1593 * (RWorkspace: need N)
1595 CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
1596 $ RWORK( IE ), WORK( ITAUQ ),
1597 $ WORK( ITAUP ), WORK( IWORK ),
1598 $ LWORK-IWORK+1, IERR )
1600 * Generate left bidiagonalizing vectors in WORK(IR)
1601 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1604 CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1605 $ WORK( ITAUQ ), WORK( IWORK ),
1606 $ LWORK-IWORK+1, IERR )
1609 * Perform bidiagonal QR iteration, computing left
1610 * singular vectors of R in WORK(IR)
1611 * (CWorkspace: need N*N)
1612 * (RWorkspace: need BDSPAC)
1614 CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1615 $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1616 $ RWORK( IRWORK ), INFO )
1618 * Multiply Q in U by left singular vectors of R in
1619 * WORK(IR), storing result in A
1620 * (CWorkspace: need N*N)
1623 CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1624 $ WORK( IR ), LDWRKR, CZERO, A, LDA )
1626 * Copy left singular vectors of A from A to U
1628 CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1632 * Insufficient workspace for a fast algorithm
1637 * Compute A=Q*R, copying result to U
1638 * (CWorkspace: need 2*N, prefer N+N*NB)
1641 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1642 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1643 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1646 * (CWorkspace: need N+M, prefer N+M*NB)
1649 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1650 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1656 * Zero out below R in A
1659 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1663 * Bidiagonalize R in A
1664 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1665 * (RWorkspace: need N)
1667 CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1668 $ WORK( ITAUQ ), WORK( ITAUP ),
1669 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1671 * Multiply Q in U by left bidiagonalizing vectors
1673 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1676 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1677 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1678 $ LWORK-IWORK+1, IERR )
1681 * Perform bidiagonal QR iteration, computing left
1682 * singular vectors of A in U
1684 * (RWorkspace: need BDSPAC)
1686 CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1687 $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1692 ELSE IF( WNTVO ) THEN
1694 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1695 * M left singular vectors to be computed in U and
1696 * N right singular vectors to be overwritten on A
1698 IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
1700 * Sufficient workspace for a fast algorithm
1703 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1705 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1710 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1712 * WORK(IU) is LDA by N and WORK(IR) is N by N
1719 * WORK(IU) is N by N and WORK(IR) is N by N
1725 ITAU = IR + LDWRKR*N
1728 * Compute A=Q*R, copying result to U
1729 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1732 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1733 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1734 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1737 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1740 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1741 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1743 * Copy R to WORK(IU), zeroing out below it
1745 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1747 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1748 $ WORK( IU+1 ), LDWRKU )
1754 * Bidiagonalize R in WORK(IU), copying result to
1756 * (CWorkspace: need 2*N*N+3*N,
1757 * prefer 2*N*N+2*N+2*N*NB)
1758 * (RWorkspace: need N)
1760 CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1761 $ RWORK( IE ), WORK( ITAUQ ),
1762 $ WORK( ITAUP ), WORK( IWORK ),
1763 $ LWORK-IWORK+1, IERR )
1764 CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1765 $ WORK( IR ), LDWRKR )
1767 * Generate left bidiagonalizing vectors in WORK(IU)
1768 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1771 CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1772 $ WORK( ITAUQ ), WORK( IWORK ),
1773 $ LWORK-IWORK+1, IERR )
1775 * Generate right bidiagonalizing vectors in WORK(IR)
1776 * (CWorkspace: need 2*N*N+3*N-1,
1777 * prefer 2*N*N+2*N+(N-1)*NB)
1780 CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1781 $ WORK( ITAUP ), WORK( IWORK ),
1782 $ LWORK-IWORK+1, IERR )
1785 * Perform bidiagonal QR iteration, computing left
1786 * singular vectors of R in WORK(IU) and computing
1787 * right singular vectors of R in WORK(IR)
1788 * (CWorkspace: need 2*N*N)
1789 * (RWorkspace: need BDSPAC)
1791 CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1792 $ WORK( IR ), LDWRKR, WORK( IU ),
1793 $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1796 * Multiply Q in U by left singular vectors of R in
1797 * WORK(IU), storing result in A
1798 * (CWorkspace: need N*N)
1801 CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1802 $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1804 * Copy left singular vectors of A from A to U
1806 CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1808 * Copy right singular vectors of R from WORK(IR) to A
1810 CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1815 * Insufficient workspace for a fast algorithm
1820 * Compute A=Q*R, copying result to U
1821 * (CWorkspace: need 2*N, prefer N+N*NB)
1824 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1825 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1826 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1829 * (CWorkspace: need N+M, prefer N+M*NB)
1832 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1833 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1839 * Zero out below R in A
1842 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1846 * Bidiagonalize R in A
1847 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1848 * (RWorkspace: need N)
1850 CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1851 $ WORK( ITAUQ ), WORK( ITAUP ),
1852 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1854 * Multiply Q in U by left bidiagonalizing vectors
1856 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1859 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1860 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1861 $ LWORK-IWORK+1, IERR )
1863 * Generate right bidiagonalizing vectors in A
1864 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1867 CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1868 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1871 * Perform bidiagonal QR iteration, computing left
1872 * singular vectors of A in U and computing right
1873 * singular vectors of A in A
1875 * (RWorkspace: need BDSPAC)
1877 CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1878 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1883 ELSE IF( WNTVAS ) THEN
1885 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1887 * M left singular vectors to be computed in U and
1888 * N right singular vectors to be computed in VT
1890 IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1892 * Sufficient workspace for a fast algorithm
1895 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1897 * WORK(IU) is LDA by N
1902 * WORK(IU) is N by N
1906 ITAU = IU + LDWRKU*N
1909 * Compute A=Q*R, copying result to U
1910 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1913 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1914 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1915 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1918 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1921 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1922 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1924 * Copy R to WORK(IU), zeroing out below it
1926 CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1928 CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1929 $ WORK( IU+1 ), LDWRKU )
1935 * Bidiagonalize R in WORK(IU), copying result to VT
1936 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1937 * (RWorkspace: need N)
1939 CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1940 $ RWORK( IE ), WORK( ITAUQ ),
1941 $ WORK( ITAUP ), WORK( IWORK ),
1942 $ LWORK-IWORK+1, IERR )
1943 CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1946 * Generate left bidiagonalizing vectors in WORK(IU)
1947 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1950 CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1951 $ WORK( ITAUQ ), WORK( IWORK ),
1952 $ LWORK-IWORK+1, IERR )
1954 * Generate right bidiagonalizing vectors in VT
1955 * (CWorkspace: need N*N+3*N-1,
1956 * prefer N*N+2*N+(N-1)*NB)
1957 * (RWorkspace: need 0)
1959 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1960 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1963 * Perform bidiagonal QR iteration, computing left
1964 * singular vectors of R in WORK(IU) and computing
1965 * right singular vectors of R in VT
1966 * (CWorkspace: need N*N)
1967 * (RWorkspace: need BDSPAC)
1969 CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1970 $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1971 $ RWORK( IRWORK ), INFO )
1973 * Multiply Q in U by left singular vectors of R in
1974 * WORK(IU), storing result in A
1975 * (CWorkspace: need N*N)
1978 CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1979 $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1981 * Copy left singular vectors of A from A to U
1983 CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1987 * Insufficient workspace for a fast algorithm
1992 * Compute A=Q*R, copying result to U
1993 * (CWorkspace: need 2*N, prefer N+N*NB)
1996 CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1997 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1998 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
2001 * (CWorkspace: need N+M, prefer N+M*NB)
2004 CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
2005 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2007 * Copy R from A to VT, zeroing out below it
2009 CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
2011 $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
2012 $ VT( 2, 1 ), LDVT )
2018 * Bidiagonalize R in VT
2019 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2020 * (RWorkspace: need N)
2022 CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
2023 $ WORK( ITAUQ ), WORK( ITAUP ),
2024 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2026 * Multiply Q in U by left bidiagonalizing vectors
2028 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2031 CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
2032 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
2033 $ LWORK-IWORK+1, IERR )
2035 * Generate right bidiagonalizing vectors in VT
2036 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2039 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2040 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2043 * Perform bidiagonal QR iteration, computing left
2044 * singular vectors of A in U and computing right
2045 * singular vectors of A in VT
2047 * (RWorkspace: need BDSPAC)
2049 CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
2050 $ LDVT, U, LDU, CDUM, 1,
2051 $ RWORK( IRWORK ), INFO )
2063 * Path 10 (M at least N, but not much larger)
2064 * Reduce to bidiagonal form without QR decomposition
2072 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2073 * (RWorkspace: need N)
2075 CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2076 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2080 * If left singular vectors desired in U, copy result to U
2081 * and generate left bidiagonalizing vectors in U
2082 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2085 CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
2090 CALL ZUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
2091 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2095 * If right singular vectors desired in VT, copy result to
2096 * VT and generate right bidiagonalizing vectors in VT
2097 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2100 CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
2101 CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2102 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2106 * If left singular vectors desired in A, generate left
2107 * bidiagonalizing vectors in A
2108 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2111 CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2112 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2116 * If right singular vectors desired in A, generate right
2117 * bidiagonalizing vectors in A
2118 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2121 CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2122 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2125 IF( WNTUAS .OR. WNTUO )
2129 IF( WNTVAS .OR. WNTVO )
2133 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2135 * Perform bidiagonal QR iteration, if desired, computing
2136 * left singular vectors in U and computing right singular
2139 * (RWorkspace: need BDSPAC)
2141 CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2142 $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
2144 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2146 * Perform bidiagonal QR iteration, if desired, computing
2147 * left singular vectors in U and computing right singular
2150 * (RWorkspace: need BDSPAC)
2152 CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
2153 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
2157 * Perform bidiagonal QR iteration, if desired, computing
2158 * left singular vectors in A and computing right singular
2161 * (RWorkspace: need BDSPAC)
2163 CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2164 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
2172 * A has more columns than rows. If A has sufficiently more
2173 * columns than rows, first reduce using the LQ decomposition (if
2174 * sufficient workspace available)
2176 IF( N.GE.MNTHR ) THEN
2180 * Path 1t(N much larger than M, JOBVT='N')
2181 * No right singular vectors to be computed
2187 * (CWorkspace: need 2*M, prefer M+M*NB)
2190 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2191 $ LWORK-IWORK+1, IERR )
2195 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
2202 * Bidiagonalize L in A
2203 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2204 * (RWorkspace: need M)
2206 CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2207 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2209 IF( WNTUO .OR. WNTUAS ) THEN
2211 * If left singular vectors desired, generate Q
2212 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2215 CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2216 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2220 IF( WNTUO .OR. WNTUAS )
2223 * Perform bidiagonal QR iteration, computing left singular
2224 * vectors of A in A if desired
2226 * (RWorkspace: need BDSPAC)
2228 CALL ZBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
2229 $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
2231 * If left singular vectors desired in U, copy them there
2234 $ CALL ZLACPY( 'F', M, M, A, LDA, U, LDU )
2236 ELSE IF( WNTVO .AND. WNTUN ) THEN
2238 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2239 * M right singular vectors to be overwritten on A and
2240 * no left singular vectors to be computed
2242 IF( LWORK.GE.M*M+3*M ) THEN
2244 * Sufficient workspace for a fast algorithm
2247 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2249 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2254 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2256 * WORK(IU) is LDA by N and WORK(IR) is M by M
2263 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2266 CHUNK = ( LWORK-M*M ) / M
2269 ITAU = IR + LDWRKR*M
2273 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2276 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2277 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2279 * Copy L to WORK(IR) and zero out above it
2281 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2282 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2283 $ WORK( IR+LDWRKR ), LDWRKR )
2286 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2289 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2290 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2296 * Bidiagonalize L in WORK(IR)
2297 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2298 * (RWorkspace: need M)
2300 CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
2301 $ WORK( ITAUQ ), WORK( ITAUP ),
2302 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2304 * Generate right vectors bidiagonalizing L
2305 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2308 CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2309 $ WORK( ITAUP ), WORK( IWORK ),
2310 $ LWORK-IWORK+1, IERR )
2313 * Perform bidiagonal QR iteration, computing right
2314 * singular vectors of L in WORK(IR)
2315 * (CWorkspace: need M*M)
2316 * (RWorkspace: need BDSPAC)
2318 CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2319 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2320 $ RWORK( IRWORK ), INFO )
2323 * Multiply right singular vectors of L in WORK(IR) by Q
2324 * in A, storing result in WORK(IU) and copying to A
2325 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2328 DO 30 I = 1, N, CHUNK
2329 BLK = MIN( N-I+1, CHUNK )
2330 CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2331 $ LDWRKR, A( 1, I ), LDA, CZERO,
2332 $ WORK( IU ), LDWRKU )
2333 CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2339 * Insufficient workspace for a fast algorithm
2347 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2348 * (RWorkspace: need M)
2350 CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
2351 $ WORK( ITAUQ ), WORK( ITAUP ),
2352 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2354 * Generate right vectors bidiagonalizing A
2355 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2358 CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2359 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2362 * Perform bidiagonal QR iteration, computing right
2363 * singular vectors of A in A
2365 * (RWorkspace: need BDSPAC)
2367 CALL ZBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
2368 $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
2372 ELSE IF( WNTVO .AND. WNTUAS ) THEN
2374 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2375 * M right singular vectors to be overwritten on A and
2376 * M left singular vectors to be computed in U
2378 IF( LWORK.GE.M*M+3*M ) THEN
2380 * Sufficient workspace for a fast algorithm
2383 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2385 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2390 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2392 * WORK(IU) is LDA by N and WORK(IR) is M by M
2399 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2402 CHUNK = ( LWORK-M*M ) / M
2405 ITAU = IR + LDWRKR*M
2409 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2412 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2413 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2415 * Copy L to U, zeroing about above it
2417 CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
2418 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2422 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2425 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2426 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2432 * Bidiagonalize L in U, copying result to WORK(IR)
2433 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2434 * (RWorkspace: need M)
2436 CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
2437 $ WORK( ITAUQ ), WORK( ITAUP ),
2438 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2439 CALL ZLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2441 * Generate right vectors bidiagonalizing L in WORK(IR)
2442 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2445 CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2446 $ WORK( ITAUP ), WORK( IWORK ),
2447 $ LWORK-IWORK+1, IERR )
2449 * Generate left vectors bidiagonalizing L in U
2450 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2453 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2454 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2457 * Perform bidiagonal QR iteration, computing left
2458 * singular vectors of L in U, and computing right
2459 * singular vectors of L in WORK(IR)
2460 * (CWorkspace: need M*M)
2461 * (RWorkspace: need BDSPAC)
2463 CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2464 $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
2465 $ RWORK( IRWORK ), INFO )
2468 * Multiply right singular vectors of L in WORK(IR) by Q
2469 * in A, storing result in WORK(IU) and copying to A
2470 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2473 DO 40 I = 1, N, CHUNK
2474 BLK = MIN( N-I+1, CHUNK )
2475 CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2476 $ LDWRKR, A( 1, I ), LDA, CZERO,
2477 $ WORK( IU ), LDWRKU )
2478 CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2484 * Insufficient workspace for a fast algorithm
2490 * (CWorkspace: need 2*M, prefer M+M*NB)
2493 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2494 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2496 * Copy L to U, zeroing out above it
2498 CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
2499 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2503 * (CWorkspace: need 2*M, prefer M+M*NB)
2506 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2507 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2513 * Bidiagonalize L in U
2514 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2515 * (RWorkspace: need M)
2517 CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
2518 $ WORK( ITAUQ ), WORK( ITAUP ),
2519 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2521 * Multiply right vectors bidiagonalizing L by Q in A
2522 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2525 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2526 $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2527 $ LWORK-IWORK+1, IERR )
2529 * Generate left vectors bidiagonalizing L in U
2530 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2533 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2534 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2537 * Perform bidiagonal QR iteration, computing left
2538 * singular vectors of A in U and computing right
2539 * singular vectors of A in A
2541 * (RWorkspace: need BDSPAC)
2543 CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
2544 $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
2548 ELSE IF( WNTVS ) THEN
2552 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2553 * M right singular vectors to be computed in VT and
2554 * no left singular vectors to be computed
2556 IF( LWORK.GE.M*M+3*M ) THEN
2558 * Sufficient workspace for a fast algorithm
2561 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2563 * WORK(IR) is LDA by M
2568 * WORK(IR) is M by M
2572 ITAU = IR + LDWRKR*M
2576 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2579 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2580 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2582 * Copy L to WORK(IR), zeroing out above it
2584 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
2586 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2587 $ WORK( IR+LDWRKR ), LDWRKR )
2590 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2593 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2594 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2600 * Bidiagonalize L in WORK(IR)
2601 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2602 * (RWorkspace: need M)
2604 CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
2605 $ RWORK( IE ), WORK( ITAUQ ),
2606 $ WORK( ITAUP ), WORK( IWORK ),
2607 $ LWORK-IWORK+1, IERR )
2609 * Generate right vectors bidiagonalizing L in
2611 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2614 CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2615 $ WORK( ITAUP ), WORK( IWORK ),
2616 $ LWORK-IWORK+1, IERR )
2619 * Perform bidiagonal QR iteration, computing right
2620 * singular vectors of L in WORK(IR)
2621 * (CWorkspace: need M*M)
2622 * (RWorkspace: need BDSPAC)
2624 CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2625 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2626 $ RWORK( IRWORK ), INFO )
2628 * Multiply right singular vectors of L in WORK(IR) by
2629 * Q in A, storing result in VT
2630 * (CWorkspace: need M*M)
2633 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
2634 $ LDWRKR, A, LDA, CZERO, VT, LDVT )
2638 * Insufficient workspace for a fast algorithm
2644 * (CWorkspace: need 2*M, prefer M+M*NB)
2647 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2648 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2652 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2655 * (CWorkspace: need 2*M, prefer M+M*NB)
2658 CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2659 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2665 * Zero out above L in A
2667 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2670 * Bidiagonalize L in A
2671 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2672 * (RWorkspace: need M)
2674 CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
2675 $ WORK( ITAUQ ), WORK( ITAUP ),
2676 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2678 * Multiply right vectors bidiagonalizing L by Q in VT
2679 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2682 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2683 $ WORK( ITAUP ), VT, LDVT,
2684 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2687 * Perform bidiagonal QR iteration, computing right
2688 * singular vectors of A in VT
2690 * (RWorkspace: need BDSPAC)
2692 CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
2693 $ LDVT, CDUM, 1, CDUM, 1,
2694 $ RWORK( IRWORK ), INFO )
2698 ELSE IF( WNTUO ) THEN
2700 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2701 * M right singular vectors to be computed in VT and
2702 * M left singular vectors to be overwritten on A
2704 IF( LWORK.GE.2*M*M+3*M ) THEN
2706 * Sufficient workspace for a fast algorithm
2709 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2711 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2716 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2718 * WORK(IU) is LDA by M and WORK(IR) is M by M
2725 * WORK(IU) is M by M and WORK(IR) is M by M
2731 ITAU = IR + LDWRKR*M
2735 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2738 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2739 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2741 * Copy L to WORK(IU), zeroing out below it
2743 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
2745 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2746 $ WORK( IU+LDWRKU ), LDWRKU )
2749 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2752 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2753 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2759 * Bidiagonalize L in WORK(IU), copying result to
2761 * (CWorkspace: need 2*M*M+3*M,
2762 * prefer 2*M*M+2*M+2*M*NB)
2763 * (RWorkspace: need M)
2765 CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
2766 $ RWORK( IE ), WORK( ITAUQ ),
2767 $ WORK( ITAUP ), WORK( IWORK ),
2768 $ LWORK-IWORK+1, IERR )
2769 CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2770 $ WORK( IR ), LDWRKR )
2772 * Generate right bidiagonalizing vectors in WORK(IU)
2773 * (CWorkspace: need 2*M*M+3*M-1,
2774 * prefer 2*M*M+2*M+(M-1)*NB)
2777 CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2778 $ WORK( ITAUP ), WORK( IWORK ),
2779 $ LWORK-IWORK+1, IERR )
2781 * Generate left bidiagonalizing vectors in WORK(IR)
2782 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2785 CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2786 $ WORK( ITAUQ ), WORK( IWORK ),
2787 $ LWORK-IWORK+1, IERR )
2790 * Perform bidiagonal QR iteration, computing left
2791 * singular vectors of L in WORK(IR) and computing
2792 * right singular vectors of L in WORK(IU)
2793 * (CWorkspace: need 2*M*M)
2794 * (RWorkspace: need BDSPAC)
2796 CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2797 $ WORK( IU ), LDWRKU, WORK( IR ),
2798 $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
2801 * Multiply right singular vectors of L in WORK(IU) by
2802 * Q in A, storing result in VT
2803 * (CWorkspace: need M*M)
2806 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2807 $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2809 * Copy left singular vectors of L to A
2810 * (CWorkspace: need M*M)
2813 CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2818 * Insufficient workspace for a fast algorithm
2823 * Compute A=L*Q, copying result to VT
2824 * (CWorkspace: need 2*M, prefer M+M*NB)
2827 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2828 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2829 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2832 * (CWorkspace: need 2*M, prefer M+M*NB)
2835 CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2836 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2842 * Zero out above L in A
2844 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2847 * Bidiagonalize L in A
2848 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2849 * (RWorkspace: need M)
2851 CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
2852 $ WORK( ITAUQ ), WORK( ITAUP ),
2853 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2855 * Multiply right vectors bidiagonalizing L by Q in VT
2856 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2859 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2860 $ WORK( ITAUP ), VT, LDVT,
2861 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2863 * Generate left bidiagonalizing vectors of L in A
2864 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2867 CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2868 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2871 * Perform bidiagonal QR iteration, computing left
2872 * singular vectors of A in A and computing right
2873 * singular vectors of A in VT
2875 * (RWorkspace: need BDSPAC)
2877 CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2878 $ LDVT, A, LDA, CDUM, 1,
2879 $ RWORK( IRWORK ), INFO )
2883 ELSE IF( WNTUAS ) THEN
2885 * Path 6t(N much larger than M, JOBU='S' or 'A',
2887 * M right singular vectors to be computed in VT and
2888 * M left singular vectors to be computed in U
2890 IF( LWORK.GE.M*M+3*M ) THEN
2892 * Sufficient workspace for a fast algorithm
2895 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2897 * WORK(IU) is LDA by N
2902 * WORK(IU) is LDA by M
2906 ITAU = IU + LDWRKU*M
2910 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2913 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2914 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2916 * Copy L to WORK(IU), zeroing out above it
2918 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
2920 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2921 $ WORK( IU+LDWRKU ), LDWRKU )
2924 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2927 CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2928 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2934 * Bidiagonalize L in WORK(IU), copying result to U
2935 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2936 * (RWorkspace: need M)
2938 CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
2939 $ RWORK( IE ), WORK( ITAUQ ),
2940 $ WORK( ITAUP ), WORK( IWORK ),
2941 $ LWORK-IWORK+1, IERR )
2942 CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2945 * Generate right bidiagonalizing vectors in WORK(IU)
2946 * (CWorkspace: need M*M+3*M-1,
2947 * prefer M*M+2*M+(M-1)*NB)
2950 CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2951 $ WORK( ITAUP ), WORK( IWORK ),
2952 $ LWORK-IWORK+1, IERR )
2954 * Generate left bidiagonalizing vectors in U
2955 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2958 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2959 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2962 * Perform bidiagonal QR iteration, computing left
2963 * singular vectors of L in U and computing right
2964 * singular vectors of L in WORK(IU)
2965 * (CWorkspace: need M*M)
2966 * (RWorkspace: need BDSPAC)
2968 CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2969 $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
2970 $ RWORK( IRWORK ), INFO )
2972 * Multiply right singular vectors of L in WORK(IU) by
2973 * Q in A, storing result in VT
2974 * (CWorkspace: need M*M)
2977 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2978 $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2982 * Insufficient workspace for a fast algorithm
2987 * Compute A=L*Q, copying result to VT
2988 * (CWorkspace: need 2*M, prefer M+M*NB)
2991 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2992 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2993 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2996 * (CWorkspace: need 2*M, prefer M+M*NB)
2999 CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
3000 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3002 * Copy L to U, zeroing out above it
3004 CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
3005 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3012 * Bidiagonalize L in U
3013 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3014 * (RWorkspace: need M)
3016 CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
3017 $ WORK( ITAUQ ), WORK( ITAUP ),
3018 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3020 * Multiply right bidiagonalizing vectors in U by Q
3022 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3025 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3026 $ WORK( ITAUP ), VT, LDVT,
3027 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3029 * Generate left bidiagonalizing vectors in U
3030 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3033 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3034 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3037 * Perform bidiagonal QR iteration, computing left
3038 * singular vectors of A in U and computing right
3039 * singular vectors of A in VT
3041 * (RWorkspace: need BDSPAC)
3043 CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3044 $ LDVT, U, LDU, CDUM, 1,
3045 $ RWORK( IRWORK ), INFO )
3051 ELSE IF( WNTVA ) THEN
3055 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3056 * N right singular vectors to be computed in VT and
3057 * no left singular vectors to be computed
3059 IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3061 * Sufficient workspace for a fast algorithm
3064 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3066 * WORK(IR) is LDA by M
3071 * WORK(IR) is M by M
3075 ITAU = IR + LDWRKR*M
3078 * Compute A=L*Q, copying result to VT
3079 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3082 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3083 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3084 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3086 * Copy L to WORK(IR), zeroing out above it
3088 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
3090 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3091 $ WORK( IR+LDWRKR ), LDWRKR )
3094 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3097 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3098 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3104 * Bidiagonalize L in WORK(IR)
3105 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3106 * (RWorkspace: need M)
3108 CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
3109 $ RWORK( IE ), WORK( ITAUQ ),
3110 $ WORK( ITAUP ), WORK( IWORK ),
3111 $ LWORK-IWORK+1, IERR )
3113 * Generate right bidiagonalizing vectors in WORK(IR)
3114 * (CWorkspace: need M*M+3*M-1,
3115 * prefer M*M+2*M+(M-1)*NB)
3118 CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
3119 $ WORK( ITAUP ), WORK( IWORK ),
3120 $ LWORK-IWORK+1, IERR )
3123 * Perform bidiagonal QR iteration, computing right
3124 * singular vectors of L in WORK(IR)
3125 * (CWorkspace: need M*M)
3126 * (RWorkspace: need BDSPAC)
3128 CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
3129 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
3130 $ RWORK( IRWORK ), INFO )
3132 * Multiply right singular vectors of L in WORK(IR) by
3133 * Q in VT, storing result in A
3134 * (CWorkspace: need M*M)
3137 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
3138 $ LDWRKR, VT, LDVT, CZERO, A, LDA )
3140 * Copy right singular vectors of A from A to VT
3142 CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3146 * Insufficient workspace for a fast algorithm
3151 * Compute A=L*Q, copying result to VT
3152 * (CWorkspace: need 2*M, prefer M+M*NB)
3155 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3156 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3157 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3160 * (CWorkspace: need M+N, prefer M+N*NB)
3163 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3164 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3170 * Zero out above L in A
3172 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3175 * Bidiagonalize L in A
3176 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3177 * (RWorkspace: need M)
3179 CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
3180 $ WORK( ITAUQ ), WORK( ITAUP ),
3181 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3183 * Multiply right bidiagonalizing vectors in A by Q
3185 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3188 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3189 $ WORK( ITAUP ), VT, LDVT,
3190 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3193 * Perform bidiagonal QR iteration, computing right
3194 * singular vectors of A in VT
3196 * (RWorkspace: need BDSPAC)
3198 CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
3199 $ LDVT, CDUM, 1, CDUM, 1,
3200 $ RWORK( IRWORK ), INFO )
3204 ELSE IF( WNTUO ) THEN
3206 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3207 * N right singular vectors to be computed in VT and
3208 * M left singular vectors to be overwritten on A
3210 IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
3212 * Sufficient workspace for a fast algorithm
3215 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3217 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3222 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3224 * WORK(IU) is LDA by M and WORK(IR) is M by M
3231 * WORK(IU) is M by M and WORK(IR) is M by M
3237 ITAU = IR + LDWRKR*M
3240 * Compute A=L*Q, copying result to VT
3241 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3244 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3245 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3246 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3249 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3252 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3253 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3255 * Copy L to WORK(IU), zeroing out above it
3257 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
3259 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3260 $ WORK( IU+LDWRKU ), LDWRKU )
3266 * Bidiagonalize L in WORK(IU), copying result to
3268 * (CWorkspace: need 2*M*M+3*M,
3269 * prefer 2*M*M+2*M+2*M*NB)
3270 * (RWorkspace: need M)
3272 CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
3273 $ RWORK( IE ), WORK( ITAUQ ),
3274 $ WORK( ITAUP ), WORK( IWORK ),
3275 $ LWORK-IWORK+1, IERR )
3276 CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3277 $ WORK( IR ), LDWRKR )
3279 * Generate right bidiagonalizing vectors in WORK(IU)
3280 * (CWorkspace: need 2*M*M+3*M-1,
3281 * prefer 2*M*M+2*M+(M-1)*NB)
3284 CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3285 $ WORK( ITAUP ), WORK( IWORK ),
3286 $ LWORK-IWORK+1, IERR )
3288 * Generate left bidiagonalizing vectors in WORK(IR)
3289 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3292 CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3293 $ WORK( ITAUQ ), WORK( IWORK ),
3294 $ LWORK-IWORK+1, IERR )
3297 * Perform bidiagonal QR iteration, computing left
3298 * singular vectors of L in WORK(IR) and computing
3299 * right singular vectors of L in WORK(IU)
3300 * (CWorkspace: need 2*M*M)
3301 * (RWorkspace: need BDSPAC)
3303 CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3304 $ WORK( IU ), LDWRKU, WORK( IR ),
3305 $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
3308 * Multiply right singular vectors of L in WORK(IU) by
3309 * Q in VT, storing result in A
3310 * (CWorkspace: need M*M)
3313 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3314 $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3316 * Copy right singular vectors of A from A to VT
3318 CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3320 * Copy left singular vectors of A from WORK(IR) to A
3322 CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3327 * Insufficient workspace for a fast algorithm
3332 * Compute A=L*Q, copying result to VT
3333 * (CWorkspace: need 2*M, prefer M+M*NB)
3336 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3337 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3338 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3341 * (CWorkspace: need M+N, prefer M+N*NB)
3344 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3345 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3351 * Zero out above L in A
3353 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3356 * Bidiagonalize L in A
3357 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3358 * (RWorkspace: need M)
3360 CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
3361 $ WORK( ITAUQ ), WORK( ITAUP ),
3362 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3364 * Multiply right bidiagonalizing vectors in A by Q
3366 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3369 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3370 $ WORK( ITAUP ), VT, LDVT,
3371 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3373 * Generate left bidiagonalizing vectors in A
3374 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3377 CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3378 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3381 * Perform bidiagonal QR iteration, computing left
3382 * singular vectors of A in A and computing right
3383 * singular vectors of A in VT
3385 * (RWorkspace: need BDSPAC)
3387 CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3388 $ LDVT, A, LDA, CDUM, 1,
3389 $ RWORK( IRWORK ), INFO )
3393 ELSE IF( WNTUAS ) THEN
3395 * Path 9t(N much larger than M, JOBU='S' or 'A',
3397 * N right singular vectors to be computed in VT and
3398 * M left singular vectors to be computed in U
3400 IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3402 * Sufficient workspace for a fast algorithm
3405 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3407 * WORK(IU) is LDA by M
3412 * WORK(IU) is M by M
3416 ITAU = IU + LDWRKU*M
3419 * Compute A=L*Q, copying result to VT
3420 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3423 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3424 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3425 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3428 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3431 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3432 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3434 * Copy L to WORK(IU), zeroing out above it
3436 CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
3438 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3439 $ WORK( IU+LDWRKU ), LDWRKU )
3445 * Bidiagonalize L in WORK(IU), copying result to U
3446 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3447 * (RWorkspace: need M)
3449 CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
3450 $ RWORK( IE ), WORK( ITAUQ ),
3451 $ WORK( ITAUP ), WORK( IWORK ),
3452 $ LWORK-IWORK+1, IERR )
3453 CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3456 * Generate right bidiagonalizing vectors in WORK(IU)
3457 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3460 CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3461 $ WORK( ITAUP ), WORK( IWORK ),
3462 $ LWORK-IWORK+1, IERR )
3464 * Generate left bidiagonalizing vectors in U
3465 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3468 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3469 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3472 * Perform bidiagonal QR iteration, computing left
3473 * singular vectors of L in U and computing right
3474 * singular vectors of L in WORK(IU)
3475 * (CWorkspace: need M*M)
3476 * (RWorkspace: need BDSPAC)
3478 CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3479 $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
3480 $ RWORK( IRWORK ), INFO )
3482 * Multiply right singular vectors of L in WORK(IU) by
3483 * Q in VT, storing result in A
3484 * (CWorkspace: need M*M)
3487 CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3488 $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3490 * Copy right singular vectors of A from A to VT
3492 CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3496 * Insufficient workspace for a fast algorithm
3501 * Compute A=L*Q, copying result to VT
3502 * (CWorkspace: need 2*M, prefer M+M*NB)
3505 CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3506 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3507 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3510 * (CWorkspace: need M+N, prefer M+N*NB)
3513 CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3514 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3516 * Copy L to U, zeroing out above it
3518 CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
3519 CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3526 * Bidiagonalize L in U
3527 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3528 * (RWorkspace: need M)
3530 CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
3531 $ WORK( ITAUQ ), WORK( ITAUP ),
3532 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3534 * Multiply right bidiagonalizing vectors in U by Q
3536 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3539 CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3540 $ WORK( ITAUP ), VT, LDVT,
3541 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3543 * Generate left bidiagonalizing vectors in U
3544 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3547 CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3548 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3551 * Perform bidiagonal QR iteration, computing left
3552 * singular vectors of A in U and computing right
3553 * singular vectors of A in VT
3555 * (RWorkspace: need BDSPAC)
3557 CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3558 $ LDVT, U, LDU, CDUM, 1,
3559 $ RWORK( IRWORK ), INFO )
3571 * Path 10t(N greater than M, but not much larger)
3572 * Reduce to bidiagonal form without LQ decomposition
3580 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3583 CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
3584 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3588 * If left singular vectors desired in U, copy result to U
3589 * and generate left bidiagonalizing vectors in U
3590 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3593 CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
3594 CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3595 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3599 * If right singular vectors desired in VT, copy result to
3600 * VT and generate right bidiagonalizing vectors in VT
3601 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3604 CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3609 CALL ZUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3610 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3614 * If left singular vectors desired in A, generate left
3615 * bidiagonalizing vectors in A
3616 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3619 CALL ZUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3620 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3624 * If right singular vectors desired in A, generate right
3625 * bidiagonalizing vectors in A
3626 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3629 CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3630 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3633 IF( WNTUAS .OR. WNTUO )
3637 IF( WNTVAS .OR. WNTVO )
3641 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3643 * Perform bidiagonal QR iteration, if desired, computing
3644 * left singular vectors in U and computing right singular
3647 * (RWorkspace: need BDSPAC)
3649 CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3650 $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
3652 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3654 * Perform bidiagonal QR iteration, if desired, computing
3655 * left singular vectors in U and computing right singular
3658 * (RWorkspace: need BDSPAC)
3660 CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
3661 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
3665 * Perform bidiagonal QR iteration, if desired, computing
3666 * left singular vectors in A and computing right singular
3669 * (RWorkspace: need BDSPAC)
3671 CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3672 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
3680 * Undo scaling if necessary
3682 IF( ISCL.EQ.1 ) THEN
3683 IF( ANRM.GT.BIGNUM )
3684 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3686 IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3687 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
3688 $ RWORK( IE ), MINMN, IERR )
3689 IF( ANRM.LT.SMLNUM )
3690 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3692 IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3693 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
3694 $ RWORK( IE ), MINMN, IERR )
3697 * Return optimal workspace in WORK(1)