1 *> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download CGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">
21 * SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
28 * .. Array Arguments ..
29 * REAL RWORK( * ), S( * )
30 * COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
40 *> CGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
44 *> A = U * SIGMA * conjugate-transpose(V)
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
53 *> Note that the routine returns V**H, not V.
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
85 *> JOBVT and JOBU cannot both be 'O'.
91 *> The number of rows of the input matrix A. M >= 0.
97 *> The number of columns of the input matrix A. N >= 0.
102 *> A is COMPLEX array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
118 *> The leading dimension of the array A. LDA >= max(1,M).
123 *> S is REAL array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
129 *> U is COMPLEX array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
146 *> VT is COMPLEX array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
163 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
182 *> RWORK is REAL array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if CBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
211 *> \ingroup complexGEsing
213 * =====================================================================
214 SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
215 $ WORK, LWORK, RWORK, INFO )
217 * -- LAPACK driver routine (version 3.6.1) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
222 * .. Scalar Arguments ..
223 CHARACTER JOBU, JOBVT
224 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
226 * .. Array Arguments ..
227 REAL RWORK( * ), S( * )
228 COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
232 * =====================================================================
236 PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ),
237 $ CONE = ( 1.0E0, 0.0E0 ) )
239 PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 )
241 * .. Local Scalars ..
242 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
244 INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
246 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
248 INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
249 $ LWORK_CGEBRD, LWORK_CUNGBR_P, LWORK_CUNGBR_Q,
250 $ LWORK_CGELQF, LWORK_CUNGLQ_N, LWORK_CUNGLQ_M
251 REAL ANRM, BIGNUM, EPS, SMLNUM
257 * .. External Subroutines ..
258 EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY,
259 $ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR,
262 * .. External Functions ..
266 EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH
268 * .. Intrinsic Functions ..
269 INTRINSIC MAX, MIN, SQRT
271 * .. Executable Statements ..
273 * Test the input arguments
277 WNTUA = LSAME( JOBU, 'A' )
278 WNTUS = LSAME( JOBU, 'S' )
279 WNTUAS = WNTUA .OR. WNTUS
280 WNTUO = LSAME( JOBU, 'O' )
281 WNTUN = LSAME( JOBU, 'N' )
282 WNTVA = LSAME( JOBVT, 'A' )
283 WNTVS = LSAME( JOBVT, 'S' )
284 WNTVAS = WNTVA .OR. WNTVS
285 WNTVO = LSAME( JOBVT, 'O' )
286 WNTVN = LSAME( JOBVT, 'N' )
287 LQUERY = ( LWORK.EQ.-1 )
289 IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
291 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
292 $ ( WNTVO .AND. WNTUO ) ) THEN
294 ELSE IF( M.LT.0 ) THEN
296 ELSE IF( N.LT.0 ) THEN
298 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
300 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
302 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
303 $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
318 IF( M.GE.N .AND. MINMN.GT.0 ) THEN
320 * Space needed for ZBDSQR is BDSPAC = 5*N
322 MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
323 * Compute space needed for CGEQRF
324 CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
325 LWORK_CGEQRF = INT( CDUM(1) )
326 * Compute space needed for CUNGQR
327 CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
328 LWORK_CUNGQR_N = INT( CDUM(1) )
329 CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
330 LWORK_CUNGQR_M = INT( CDUM(1) )
331 * Compute space needed for CGEBRD
332 CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
333 $ CDUM(1), CDUM(1), -1, IERR )
334 LWORK_CGEBRD = INT( CDUM(1) )
335 * Compute space needed for CUNGBR
336 CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
337 $ CDUM(1), -1, IERR )
338 LWORK_CUNGBR_P = INT( CDUM(1) )
339 CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
340 $ CDUM(1), -1, IERR )
341 LWORK_CUNGBR_Q = INT( CDUM(1) )
343 MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
344 IF( M.GE.MNTHR ) THEN
347 * Path 1 (M much larger than N, JOBU='N')
349 MAXWRK = N + LWORK_CGEQRF
350 MAXWRK = MAX( MAXWRK, 2*N+LWORK_CGEBRD )
351 IF( WNTVO .OR. WNTVAS )
352 $ MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_P )
354 ELSE IF( WNTUO .AND. WNTVN ) THEN
356 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
358 WRKBL = N + LWORK_CGEQRF
359 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N )
360 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
361 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
362 MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
364 ELSE IF( WNTUO .AND. WNTVAS ) THEN
366 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
369 WRKBL = N + LWORK_CGEQRF
370 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N )
371 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
372 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
373 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P )
374 MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
376 ELSE IF( WNTUS .AND. WNTVN ) THEN
378 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
380 WRKBL = N + LWORK_CGEQRF
381 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N )
382 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
383 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
386 ELSE IF( WNTUS .AND. WNTVO ) THEN
388 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
390 WRKBL = N + LWORK_CGEQRF
391 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N )
392 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
393 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
394 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P )
395 MAXWRK = 2*N*N + WRKBL
397 ELSE IF( WNTUS .AND. WNTVAS ) THEN
399 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
402 WRKBL = N + LWORK_CGEQRF
403 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N )
404 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
405 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
406 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P )
409 ELSE IF( WNTUA .AND. WNTVN ) THEN
411 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
413 WRKBL = N + LWORK_CGEQRF
414 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M )
415 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
416 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
419 ELSE IF( WNTUA .AND. WNTVO ) THEN
421 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
423 WRKBL = N + LWORK_CGEQRF
424 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M )
425 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
426 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
427 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P )
428 MAXWRK = 2*N*N + WRKBL
430 ELSE IF( WNTUA .AND. WNTVAS ) THEN
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
435 WRKBL = N + LWORK_CGEQRF
436 WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M )
437 WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD )
438 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q )
439 WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P )
445 * Path 10 (M at least N, but not much larger)
447 CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
448 $ CDUM(1), CDUM(1), -1, IERR )
449 LWORK_CGEBRD = INT( CDUM(1) )
450 MAXWRK = 2*N + LWORK_CGEBRD
451 IF( WNTUS .OR. WNTUO ) THEN
452 CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
453 $ CDUM(1), -1, IERR )
454 LWORK_CUNGBR_Q = INT( CDUM(1) )
455 MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
458 CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
459 $ CDUM(1), -1, IERR )
460 LWORK_CUNGBR_Q = INT( CDUM(1) )
461 MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
463 IF( .NOT.WNTVN ) THEN
464 MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_P )
468 ELSE IF( MINMN.GT.0 ) THEN
470 * Space needed for CBDSQR is BDSPAC = 5*M
472 MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
473 * Compute space needed for CGELQF
474 CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
475 LWORK_CGELQF = INT( CDUM(1) )
476 * Compute space needed for CUNGLQ
477 CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
479 LWORK_CUNGLQ_N = INT( CDUM(1) )
480 CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
481 LWORK_CUNGLQ_M = INT( CDUM(1) )
482 * Compute space needed for CGEBRD
483 CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
484 $ CDUM(1), CDUM(1), -1, IERR )
485 LWORK_CGEBRD = INT( CDUM(1) )
486 * Compute space needed for CUNGBR P
487 CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1),
488 $ CDUM(1), -1, IERR )
489 LWORK_CUNGBR_P = INT( CDUM(1) )
490 * Compute space needed for CUNGBR Q
491 CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1),
492 $ CDUM(1), -1, IERR )
493 LWORK_CUNGBR_Q = INT( CDUM(1) )
494 IF( N.GE.MNTHR ) THEN
497 * Path 1t(N much larger than M, JOBVT='N')
499 MAXWRK = M + LWORK_CGELQF
500 MAXWRK = MAX( MAXWRK, 2*M+LWORK_CGEBRD )
501 IF( WNTUO .OR. WNTUAS )
502 $ MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_Q )
504 ELSE IF( WNTVO .AND. WNTUN ) THEN
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
508 WRKBL = M + LWORK_CGELQF
509 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M )
510 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
511 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
512 MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
514 ELSE IF( WNTVO .AND. WNTUAS ) THEN
516 * Path 3t(N much larger than M, JOBU='S' or 'A',
519 WRKBL = M + LWORK_CGELQF
520 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M )
521 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
522 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
523 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q )
524 MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
526 ELSE IF( WNTVS .AND. WNTUN ) THEN
528 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
530 WRKBL = M + LWORK_CGELQF
531 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M )
532 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
533 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
536 ELSE IF( WNTVS .AND. WNTUO ) THEN
538 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
540 WRKBL = M + LWORK_CGELQF
541 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M )
542 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
543 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
544 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q )
545 MAXWRK = 2*M*M + WRKBL
547 ELSE IF( WNTVS .AND. WNTUAS ) THEN
549 * Path 6t(N much larger than M, JOBU='S' or 'A',
552 WRKBL = M + LWORK_CGELQF
553 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M )
554 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
555 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
556 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q )
559 ELSE IF( WNTVA .AND. WNTUN ) THEN
561 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
563 WRKBL = M + LWORK_CGELQF
564 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N )
565 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
566 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
569 ELSE IF( WNTVA .AND. WNTUO ) THEN
571 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
573 WRKBL = M + LWORK_CGELQF
574 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N )
575 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
576 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
577 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q )
578 MAXWRK = 2*M*M + WRKBL
580 ELSE IF( WNTVA .AND. WNTUAS ) THEN
582 * Path 9t(N much larger than M, JOBU='S' or 'A',
585 WRKBL = M + LWORK_CGELQF
586 WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N )
587 WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD )
588 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P )
589 WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q )
595 * Path 10t(N greater than M, but not much larger)
597 CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
598 $ CDUM(1), CDUM(1), -1, IERR )
599 LWORK_CGEBRD = INT( CDUM(1) )
600 MAXWRK = 2*M + LWORK_CGEBRD
601 IF( WNTVS .OR. WNTVO ) THEN
602 * Compute space needed for CUNGBR P
603 CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1),
604 $ CDUM(1), -1, IERR )
605 LWORK_CUNGBR_P = INT( CDUM(1) )
606 MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
609 CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1),
610 $ CDUM(1), -1, IERR )
611 LWORK_CUNGBR_P = INT( CDUM(1) )
612 MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
614 IF( .NOT.WNTUN ) THEN
615 MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_Q )
620 MAXWRK = MAX( MINWRK, MAXWRK )
623 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
629 CALL XERBLA( 'CGESVD', -INFO )
631 ELSE IF( LQUERY ) THEN
635 * Quick return if possible
637 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
641 * Get machine constants
644 SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
645 BIGNUM = ONE / SMLNUM
647 * Scale A if max element outside range [SMLNUM,BIGNUM]
649 ANRM = CLANGE( 'M', M, N, A, LDA, DUM )
651 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
653 CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
654 ELSE IF( ANRM.GT.BIGNUM ) THEN
656 CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
661 * A has at least as many rows as columns. If A has sufficiently
662 * more rows than columns, first reduce using the QR
663 * decomposition (if sufficient workspace available)
665 IF( M.GE.MNTHR ) THEN
669 * Path 1 (M much larger than N, JOBU='N')
670 * No left singular vectors to be computed
676 * (CWorkspace: need 2*N, prefer N+N*NB)
677 * (RWorkspace: need 0)
679 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
680 $ LWORK-IWORK+1, IERR )
685 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
693 * Bidiagonalize R in A
694 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
695 * (RWorkspace: need N)
697 CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
698 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
701 IF( WNTVO .OR. WNTVAS ) THEN
703 * If right singular vectors desired, generate P'.
704 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
707 CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
708 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
713 * Perform bidiagonal QR iteration, computing right
714 * singular vectors of A in A if desired
716 * (RWorkspace: need BDSPAC)
718 CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
719 $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
721 * If right singular vectors desired in VT, copy them there
724 $ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
726 ELSE IF( WNTUO .AND. WNTVN ) THEN
728 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
729 * N left singular vectors to be overwritten on A and
730 * no right singular vectors to be computed
732 IF( LWORK.GE.N*N+3*N ) THEN
734 * Sufficient workspace for a fast algorithm
737 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
739 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
743 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
745 * WORK(IU) is LDA by N, WORK(IR) is N by N
751 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
753 LDWRKU = ( LWORK-N*N ) / N
760 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
763 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
764 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
766 * Copy R to WORK(IR) and zero out below it
768 CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
769 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
770 $ WORK( IR+1 ), LDWRKR )
773 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
776 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
777 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
783 * Bidiagonalize R in WORK(IR)
784 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
785 * (RWorkspace: need N)
787 CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
788 $ WORK( ITAUQ ), WORK( ITAUP ),
789 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
791 * Generate left vectors bidiagonalizing R
792 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
793 * (RWorkspace: need 0)
795 CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
796 $ WORK( ITAUQ ), WORK( IWORK ),
797 $ LWORK-IWORK+1, IERR )
800 * Perform bidiagonal QR iteration, computing left
801 * singular vectors of R in WORK(IR)
802 * (CWorkspace: need N*N)
803 * (RWorkspace: need BDSPAC)
805 CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
806 $ WORK( IR ), LDWRKR, CDUM, 1,
807 $ RWORK( IRWORK ), INFO )
810 * Multiply Q in A by left singular vectors of R in
811 * WORK(IR), storing result in WORK(IU) and copying to A
812 * (CWorkspace: need N*N+N, prefer N*N+M*N)
815 DO 10 I = 1, M, LDWRKU
816 CHUNK = MIN( M-I+1, LDWRKU )
817 CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
818 $ LDA, WORK( IR ), LDWRKR, CZERO,
819 $ WORK( IU ), LDWRKU )
820 CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
826 * Insufficient workspace for a fast algorithm
834 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
837 CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
838 $ WORK( ITAUQ ), WORK( ITAUP ),
839 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
841 * Generate left vectors bidiagonalizing A
842 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
845 CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
846 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
849 * Perform bidiagonal QR iteration, computing left
850 * singular vectors of A in A
851 * (CWorkspace: need 0)
852 * (RWorkspace: need BDSPAC)
854 CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
855 $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
859 ELSE IF( WNTUO .AND. WNTVAS ) THEN
861 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
862 * N left singular vectors to be overwritten on A and
863 * N right singular vectors to be computed in VT
865 IF( LWORK.GE.N*N+3*N ) THEN
867 * Sufficient workspace for a fast algorithm
870 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
872 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
876 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
878 * WORK(IU) is LDA by N and WORK(IR) is N by N
884 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
886 LDWRKU = ( LWORK-N*N ) / N
893 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
896 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
897 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
899 * Copy R to VT, zeroing out below it
901 CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
903 $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
907 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
910 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
911 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
917 * Bidiagonalize R in VT, copying result to WORK(IR)
918 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
919 * (RWorkspace: need N)
921 CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
922 $ WORK( ITAUQ ), WORK( ITAUP ),
923 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
924 CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
926 * Generate left vectors bidiagonalizing R in WORK(IR)
927 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
930 CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
931 $ WORK( ITAUQ ), WORK( IWORK ),
932 $ LWORK-IWORK+1, IERR )
934 * Generate right vectors bidiagonalizing R in VT
935 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
938 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
939 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
942 * Perform bidiagonal QR iteration, computing left
943 * singular vectors of R in WORK(IR) and computing right
944 * singular vectors of R in VT
945 * (CWorkspace: need N*N)
946 * (RWorkspace: need BDSPAC)
948 CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
949 $ LDVT, WORK( IR ), LDWRKR, CDUM, 1,
950 $ RWORK( IRWORK ), INFO )
953 * Multiply Q in A by left singular vectors of R in
954 * WORK(IR), storing result in WORK(IU) and copying to A
955 * (CWorkspace: need N*N+N, prefer N*N+M*N)
958 DO 20 I = 1, M, LDWRKU
959 CHUNK = MIN( M-I+1, LDWRKU )
960 CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
961 $ LDA, WORK( IR ), LDWRKR, CZERO,
962 $ WORK( IU ), LDWRKU )
963 CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
969 * Insufficient workspace for a fast algorithm
975 * (CWorkspace: need 2*N, prefer N+N*NB)
978 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
979 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
981 * Copy R to VT, zeroing out below it
983 CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
985 $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
989 * (CWorkspace: need 2*N, prefer N+N*NB)
992 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
993 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
999 * Bidiagonalize R in VT
1000 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1003 CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1004 $ WORK( ITAUQ ), WORK( ITAUP ),
1005 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1007 * Multiply Q in A by left vectors bidiagonalizing R
1008 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1011 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1012 $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
1013 $ LWORK-IWORK+1, IERR )
1015 * Generate right vectors bidiagonalizing R in VT
1016 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1019 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1020 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1023 * Perform bidiagonal QR iteration, computing left
1024 * singular vectors of A in A and computing right
1025 * singular vectors of A in VT
1027 * (RWorkspace: need BDSPAC)
1029 CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1030 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
1035 ELSE IF( WNTUS ) THEN
1039 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1040 * N left singular vectors to be computed in U and
1041 * no right singular vectors to be computed
1043 IF( LWORK.GE.N*N+3*N ) THEN
1045 * Sufficient workspace for a fast algorithm
1048 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1050 * WORK(IR) is LDA by N
1055 * WORK(IR) is N by N
1059 ITAU = IR + LDWRKR*N
1063 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1066 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1067 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1069 * Copy R to WORK(IR), zeroing out below it
1071 CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
1073 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1074 $ WORK( IR+1 ), LDWRKR )
1077 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1080 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1081 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1087 * Bidiagonalize R in WORK(IR)
1088 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1089 * (RWorkspace: need N)
1091 CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1092 $ RWORK( IE ), WORK( ITAUQ ),
1093 $ WORK( ITAUP ), WORK( IWORK ),
1094 $ LWORK-IWORK+1, IERR )
1096 * Generate left vectors bidiagonalizing R in WORK(IR)
1097 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1100 CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1101 $ WORK( ITAUQ ), WORK( IWORK ),
1102 $ LWORK-IWORK+1, IERR )
1105 * Perform bidiagonal QR iteration, computing left
1106 * singular vectors of R in WORK(IR)
1107 * (CWorkspace: need N*N)
1108 * (RWorkspace: need BDSPAC)
1110 CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1111 $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1112 $ RWORK( IRWORK ), INFO )
1114 * Multiply Q in A by left singular vectors of R in
1115 * WORK(IR), storing result in U
1116 * (CWorkspace: need N*N)
1119 CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1120 $ WORK( IR ), LDWRKR, CZERO, U, LDU )
1124 * Insufficient workspace for a fast algorithm
1129 * Compute A=Q*R, copying result to U
1130 * (CWorkspace: need 2*N, prefer N+N*NB)
1133 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1134 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1135 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1138 * (CWorkspace: need 2*N, prefer N+N*NB)
1141 CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1142 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1148 * Zero out below R in A
1151 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1155 * Bidiagonalize R in A
1156 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1157 * (RWorkspace: need N)
1159 CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1160 $ WORK( ITAUQ ), WORK( ITAUP ),
1161 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1163 * Multiply Q in U by left vectors bidiagonalizing R
1164 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1167 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1168 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1169 $ LWORK-IWORK+1, IERR )
1172 * Perform bidiagonal QR iteration, computing left
1173 * singular vectors of A in U
1175 * (RWorkspace: need BDSPAC)
1177 CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1178 $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1183 ELSE IF( WNTVO ) THEN
1185 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1186 * N left singular vectors to be computed in U and
1187 * N right singular vectors to be overwritten on A
1189 IF( LWORK.GE.2*N*N+3*N ) THEN
1191 * Sufficient workspace for a fast algorithm
1194 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1196 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1201 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1203 * WORK(IU) is LDA by N and WORK(IR) is N by N
1210 * WORK(IU) is N by N and WORK(IR) is N by N
1216 ITAU = IR + LDWRKR*N
1220 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1223 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1224 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1226 * Copy R to WORK(IU), zeroing out below it
1228 CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1230 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1231 $ WORK( IU+1 ), LDWRKU )
1234 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1237 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1238 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1244 * Bidiagonalize R in WORK(IU), copying result to
1246 * (CWorkspace: need 2*N*N+3*N,
1247 * prefer 2*N*N+2*N+2*N*NB)
1248 * (RWorkspace: need N)
1250 CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1251 $ RWORK( IE ), WORK( ITAUQ ),
1252 $ WORK( ITAUP ), WORK( IWORK ),
1253 $ LWORK-IWORK+1, IERR )
1254 CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1255 $ WORK( IR ), LDWRKR )
1257 * Generate left bidiagonalizing vectors in WORK(IU)
1258 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1261 CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1262 $ WORK( ITAUQ ), WORK( IWORK ),
1263 $ LWORK-IWORK+1, IERR )
1265 * Generate right bidiagonalizing vectors in WORK(IR)
1266 * (CWorkspace: need 2*N*N+3*N-1,
1267 * prefer 2*N*N+2*N+(N-1)*NB)
1270 CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1271 $ WORK( ITAUP ), WORK( IWORK ),
1272 $ LWORK-IWORK+1, IERR )
1275 * Perform bidiagonal QR iteration, computing left
1276 * singular vectors of R in WORK(IU) and computing
1277 * right singular vectors of R in WORK(IR)
1278 * (CWorkspace: need 2*N*N)
1279 * (RWorkspace: need BDSPAC)
1281 CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1282 $ WORK( IR ), LDWRKR, WORK( IU ),
1283 $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1286 * Multiply Q in A by left singular vectors of R in
1287 * WORK(IU), storing result in U
1288 * (CWorkspace: need N*N)
1291 CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1292 $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1294 * Copy right singular vectors of R to A
1295 * (CWorkspace: need N*N)
1298 CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1303 * Insufficient workspace for a fast algorithm
1308 * Compute A=Q*R, copying result to U
1309 * (CWorkspace: need 2*N, prefer N+N*NB)
1312 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1313 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1314 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1317 * (CWorkspace: need 2*N, prefer N+N*NB)
1320 CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1321 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1327 * Zero out below R in A
1330 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1334 * Bidiagonalize R in A
1335 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1336 * (RWorkspace: need N)
1338 CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1339 $ WORK( ITAUQ ), WORK( ITAUP ),
1340 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1342 * Multiply Q in U by left vectors bidiagonalizing R
1343 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1346 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1347 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1348 $ LWORK-IWORK+1, IERR )
1350 * Generate right vectors bidiagonalizing R in A
1351 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1354 CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1355 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1358 * Perform bidiagonal QR iteration, computing left
1359 * singular vectors of A in U and computing right
1360 * singular vectors of A in A
1362 * (RWorkspace: need BDSPAC)
1364 CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1365 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1370 ELSE IF( WNTVAS ) THEN
1372 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1374 * N left singular vectors to be computed in U and
1375 * N right singular vectors to be computed in VT
1377 IF( LWORK.GE.N*N+3*N ) THEN
1379 * Sufficient workspace for a fast algorithm
1382 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1384 * WORK(IU) is LDA by N
1389 * WORK(IU) is N by N
1393 ITAU = IU + LDWRKU*N
1397 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1400 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1401 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1403 * Copy R to WORK(IU), zeroing out below it
1405 CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1407 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1408 $ WORK( IU+1 ), LDWRKU )
1411 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1414 CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1415 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1421 * Bidiagonalize R in WORK(IU), copying result to VT
1422 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1423 * (RWorkspace: need N)
1425 CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1426 $ RWORK( IE ), WORK( ITAUQ ),
1427 $ WORK( ITAUP ), WORK( IWORK ),
1428 $ LWORK-IWORK+1, IERR )
1429 CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1432 * Generate left bidiagonalizing vectors in WORK(IU)
1433 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1436 CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1437 $ WORK( ITAUQ ), WORK( IWORK ),
1438 $ LWORK-IWORK+1, IERR )
1440 * Generate right bidiagonalizing vectors in VT
1441 * (CWorkspace: need N*N+3*N-1,
1442 * prefer N*N+2*N+(N-1)*NB)
1445 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1446 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1449 * Perform bidiagonal QR iteration, computing left
1450 * singular vectors of R in WORK(IU) and computing
1451 * right singular vectors of R in VT
1452 * (CWorkspace: need N*N)
1453 * (RWorkspace: need BDSPAC)
1455 CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1456 $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1457 $ RWORK( IRWORK ), INFO )
1459 * Multiply Q in A by left singular vectors of R in
1460 * WORK(IU), storing result in U
1461 * (CWorkspace: need N*N)
1464 CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1465 $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1469 * Insufficient workspace for a fast algorithm
1474 * Compute A=Q*R, copying result to U
1475 * (CWorkspace: need 2*N, prefer N+N*NB)
1478 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1479 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1480 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1483 * (CWorkspace: need 2*N, prefer N+N*NB)
1486 CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1487 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1489 * Copy R to VT, zeroing out below it
1491 CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
1493 $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1494 $ VT( 2, 1 ), LDVT )
1500 * Bidiagonalize R in VT
1501 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1502 * (RWorkspace: need N)
1504 CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1505 $ WORK( ITAUQ ), WORK( ITAUP ),
1506 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1508 * Multiply Q in U by left bidiagonalizing vectors
1510 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1513 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1514 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1515 $ LWORK-IWORK+1, IERR )
1517 * Generate right bidiagonalizing vectors in VT
1518 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1521 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1522 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1525 * Perform bidiagonal QR iteration, computing left
1526 * singular vectors of A in U and computing right
1527 * singular vectors of A in VT
1529 * (RWorkspace: need BDSPAC)
1531 CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1532 $ LDVT, U, LDU, CDUM, 1,
1533 $ RWORK( IRWORK ), INFO )
1539 ELSE IF( WNTUA ) THEN
1543 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1544 * M left singular vectors to be computed in U and
1545 * no right singular vectors to be computed
1547 IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1549 * Sufficient workspace for a fast algorithm
1552 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1554 * WORK(IR) is LDA by N
1559 * WORK(IR) is N by N
1563 ITAU = IR + LDWRKR*N
1566 * Compute A=Q*R, copying result to U
1567 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1570 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1571 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1572 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1574 * Copy R to WORK(IR), zeroing out below it
1576 CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
1578 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1579 $ WORK( IR+1 ), LDWRKR )
1582 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1585 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1586 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1592 * Bidiagonalize R in WORK(IR)
1593 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1594 * (RWorkspace: need N)
1596 CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1597 $ RWORK( IE ), WORK( ITAUQ ),
1598 $ WORK( ITAUP ), WORK( IWORK ),
1599 $ LWORK-IWORK+1, IERR )
1601 * Generate left bidiagonalizing vectors in WORK(IR)
1602 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1605 CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1606 $ WORK( ITAUQ ), WORK( IWORK ),
1607 $ LWORK-IWORK+1, IERR )
1610 * Perform bidiagonal QR iteration, computing left
1611 * singular vectors of R in WORK(IR)
1612 * (CWorkspace: need N*N)
1613 * (RWorkspace: need BDSPAC)
1615 CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1616 $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1617 $ RWORK( IRWORK ), INFO )
1619 * Multiply Q in U by left singular vectors of R in
1620 * WORK(IR), storing result in A
1621 * (CWorkspace: need N*N)
1624 CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1625 $ WORK( IR ), LDWRKR, CZERO, A, LDA )
1627 * Copy left singular vectors of A from A to U
1629 CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1633 * Insufficient workspace for a fast algorithm
1638 * Compute A=Q*R, copying result to U
1639 * (CWorkspace: need 2*N, prefer N+N*NB)
1642 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1643 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1644 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1647 * (CWorkspace: need N+M, prefer N+M*NB)
1650 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1651 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1657 * Zero out below R in A
1660 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1664 * Bidiagonalize R in A
1665 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1666 * (RWorkspace: need N)
1668 CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1669 $ WORK( ITAUQ ), WORK( ITAUP ),
1670 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1672 * Multiply Q in U by left bidiagonalizing vectors
1674 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1677 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1678 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1679 $ LWORK-IWORK+1, IERR )
1682 * Perform bidiagonal QR iteration, computing left
1683 * singular vectors of A in U
1685 * (RWorkspace: need BDSPAC)
1687 CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1688 $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1693 ELSE IF( WNTVO ) THEN
1695 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1696 * M left singular vectors to be computed in U and
1697 * N right singular vectors to be overwritten on A
1699 IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
1701 * Sufficient workspace for a fast algorithm
1704 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1706 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1711 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1713 * WORK(IU) is LDA by N and WORK(IR) is N by N
1720 * WORK(IU) is N by N and WORK(IR) is N by N
1726 ITAU = IR + LDWRKR*N
1729 * Compute A=Q*R, copying result to U
1730 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1733 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1734 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1735 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1738 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1741 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1742 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1744 * Copy R to WORK(IU), zeroing out below it
1746 CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1748 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1749 $ WORK( IU+1 ), LDWRKU )
1755 * Bidiagonalize R in WORK(IU), copying result to
1757 * (CWorkspace: need 2*N*N+3*N,
1758 * prefer 2*N*N+2*N+2*N*NB)
1759 * (RWorkspace: need N)
1761 CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1762 $ RWORK( IE ), WORK( ITAUQ ),
1763 $ WORK( ITAUP ), WORK( IWORK ),
1764 $ LWORK-IWORK+1, IERR )
1765 CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1766 $ WORK( IR ), LDWRKR )
1768 * Generate left bidiagonalizing vectors in WORK(IU)
1769 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1772 CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1773 $ WORK( ITAUQ ), WORK( IWORK ),
1774 $ LWORK-IWORK+1, IERR )
1776 * Generate right bidiagonalizing vectors in WORK(IR)
1777 * (CWorkspace: need 2*N*N+3*N-1,
1778 * prefer 2*N*N+2*N+(N-1)*NB)
1781 CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1782 $ WORK( ITAUP ), WORK( IWORK ),
1783 $ LWORK-IWORK+1, IERR )
1786 * Perform bidiagonal QR iteration, computing left
1787 * singular vectors of R in WORK(IU) and computing
1788 * right singular vectors of R in WORK(IR)
1789 * (CWorkspace: need 2*N*N)
1790 * (RWorkspace: need BDSPAC)
1792 CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1793 $ WORK( IR ), LDWRKR, WORK( IU ),
1794 $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1797 * Multiply Q in U by left singular vectors of R in
1798 * WORK(IU), storing result in A
1799 * (CWorkspace: need N*N)
1802 CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1803 $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1805 * Copy left singular vectors of A from A to U
1807 CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1809 * Copy right singular vectors of R from WORK(IR) to A
1811 CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1816 * Insufficient workspace for a fast algorithm
1821 * Compute A=Q*R, copying result to U
1822 * (CWorkspace: need 2*N, prefer N+N*NB)
1825 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1826 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1827 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1830 * (CWorkspace: need N+M, prefer N+M*NB)
1833 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1834 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1840 * Zero out below R in A
1843 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1847 * Bidiagonalize R in A
1848 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1849 * (RWorkspace: need N)
1851 CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1852 $ WORK( ITAUQ ), WORK( ITAUP ),
1853 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1855 * Multiply Q in U by left bidiagonalizing vectors
1857 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1860 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1861 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1862 $ LWORK-IWORK+1, IERR )
1864 * Generate right bidiagonalizing vectors in A
1865 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1868 CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1869 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1872 * Perform bidiagonal QR iteration, computing left
1873 * singular vectors of A in U and computing right
1874 * singular vectors of A in A
1876 * (RWorkspace: need BDSPAC)
1878 CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1879 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1884 ELSE IF( WNTVAS ) THEN
1886 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1888 * M left singular vectors to be computed in U and
1889 * N right singular vectors to be computed in VT
1891 IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1893 * Sufficient workspace for a fast algorithm
1896 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1898 * WORK(IU) is LDA by N
1903 * WORK(IU) is N by N
1907 ITAU = IU + LDWRKU*N
1910 * Compute A=Q*R, copying result to U
1911 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1914 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1915 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1916 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1919 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1922 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1923 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1925 * Copy R to WORK(IU), zeroing out below it
1927 CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1929 CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1930 $ WORK( IU+1 ), LDWRKU )
1936 * Bidiagonalize R in WORK(IU), copying result to VT
1937 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1938 * (RWorkspace: need N)
1940 CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1941 $ RWORK( IE ), WORK( ITAUQ ),
1942 $ WORK( ITAUP ), WORK( IWORK ),
1943 $ LWORK-IWORK+1, IERR )
1944 CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1947 * Generate left bidiagonalizing vectors in WORK(IU)
1948 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1951 CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1952 $ WORK( ITAUQ ), WORK( IWORK ),
1953 $ LWORK-IWORK+1, IERR )
1955 * Generate right bidiagonalizing vectors in VT
1956 * (CWorkspace: need N*N+3*N-1,
1957 * prefer N*N+2*N+(N-1)*NB)
1958 * (RWorkspace: need 0)
1960 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1961 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1964 * Perform bidiagonal QR iteration, computing left
1965 * singular vectors of R in WORK(IU) and computing
1966 * right singular vectors of R in VT
1967 * (CWorkspace: need N*N)
1968 * (RWorkspace: need BDSPAC)
1970 CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1971 $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1972 $ RWORK( IRWORK ), INFO )
1974 * Multiply Q in U by left singular vectors of R in
1975 * WORK(IU), storing result in A
1976 * (CWorkspace: need N*N)
1979 CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1980 $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1982 * Copy left singular vectors of A from A to U
1984 CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1988 * Insufficient workspace for a fast algorithm
1993 * Compute A=Q*R, copying result to U
1994 * (CWorkspace: need 2*N, prefer N+N*NB)
1997 CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1998 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1999 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
2002 * (CWorkspace: need N+M, prefer N+M*NB)
2005 CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
2006 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2008 * Copy R from A to VT, zeroing out below it
2010 CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
2012 $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
2013 $ VT( 2, 1 ), LDVT )
2019 * Bidiagonalize R in VT
2020 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2021 * (RWorkspace: need N)
2023 CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
2024 $ WORK( ITAUQ ), WORK( ITAUP ),
2025 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2027 * Multiply Q in U by left bidiagonalizing vectors
2029 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2032 CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
2033 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
2034 $ LWORK-IWORK+1, IERR )
2036 * Generate right bidiagonalizing vectors in VT
2037 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2040 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2041 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2044 * Perform bidiagonal QR iteration, computing left
2045 * singular vectors of A in U and computing right
2046 * singular vectors of A in VT
2048 * (RWorkspace: need BDSPAC)
2050 CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
2051 $ LDVT, U, LDU, CDUM, 1,
2052 $ RWORK( IRWORK ), INFO )
2064 * Path 10 (M at least N, but not much larger)
2065 * Reduce to bidiagonal form without QR decomposition
2073 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2074 * (RWorkspace: need N)
2076 CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2077 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2081 * If left singular vectors desired in U, copy result to U
2082 * and generate left bidiagonalizing vectors in U
2083 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2086 CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
2091 CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
2092 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2096 * If right singular vectors desired in VT, copy result to
2097 * VT and generate right bidiagonalizing vectors in VT
2098 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2101 CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
2102 CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2103 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2107 * If left singular vectors desired in A, generate left
2108 * bidiagonalizing vectors in A
2109 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2112 CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2113 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2117 * If right singular vectors desired in A, generate right
2118 * bidiagonalizing vectors in A
2119 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2122 CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2123 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2126 IF( WNTUAS .OR. WNTUO )
2130 IF( WNTVAS .OR. WNTVO )
2134 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2136 * Perform bidiagonal QR iteration, if desired, computing
2137 * left singular vectors in U and computing right singular
2140 * (RWorkspace: need BDSPAC)
2142 CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2143 $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
2145 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2147 * Perform bidiagonal QR iteration, if desired, computing
2148 * left singular vectors in U and computing right singular
2151 * (RWorkspace: need BDSPAC)
2153 CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
2154 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
2158 * Perform bidiagonal QR iteration, if desired, computing
2159 * left singular vectors in A and computing right singular
2162 * (RWorkspace: need BDSPAC)
2164 CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2165 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
2173 * A has more columns than rows. If A has sufficiently more
2174 * columns than rows, first reduce using the LQ decomposition (if
2175 * sufficient workspace available)
2177 IF( N.GE.MNTHR ) THEN
2181 * Path 1t(N much larger than M, JOBVT='N')
2182 * No right singular vectors to be computed
2188 * (CWorkspace: need 2*M, prefer M+M*NB)
2191 CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2192 $ LWORK-IWORK+1, IERR )
2196 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
2203 * Bidiagonalize L in A
2204 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2205 * (RWorkspace: need M)
2207 CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2208 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2210 IF( WNTUO .OR. WNTUAS ) THEN
2212 * If left singular vectors desired, generate Q
2213 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2216 CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2217 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2221 IF( WNTUO .OR. WNTUAS )
2224 * Perform bidiagonal QR iteration, computing left singular
2225 * vectors of A in A if desired
2227 * (RWorkspace: need BDSPAC)
2229 CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
2230 $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
2232 * If left singular vectors desired in U, copy them there
2235 $ CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
2237 ELSE IF( WNTVO .AND. WNTUN ) THEN
2239 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2240 * M right singular vectors to be overwritten on A and
2241 * no left singular vectors to be computed
2243 IF( LWORK.GE.M*M+3*M ) THEN
2245 * Sufficient workspace for a fast algorithm
2248 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2250 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2255 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2257 * WORK(IU) is LDA by N and WORK(IR) is M by M
2264 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2267 CHUNK = ( LWORK-M*M ) / M
2270 ITAU = IR + LDWRKR*M
2274 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2277 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2278 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2280 * Copy L to WORK(IR) and zero out above it
2282 CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2283 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2284 $ WORK( IR+LDWRKR ), LDWRKR )
2287 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2290 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2291 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2297 * Bidiagonalize L in WORK(IR)
2298 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2299 * (RWorkspace: need M)
2301 CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
2302 $ WORK( ITAUQ ), WORK( ITAUP ),
2303 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2305 * Generate right vectors bidiagonalizing L
2306 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2309 CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2310 $ WORK( ITAUP ), WORK( IWORK ),
2311 $ LWORK-IWORK+1, IERR )
2314 * Perform bidiagonal QR iteration, computing right
2315 * singular vectors of L in WORK(IR)
2316 * (CWorkspace: need M*M)
2317 * (RWorkspace: need BDSPAC)
2319 CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2320 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2321 $ RWORK( IRWORK ), INFO )
2324 * Multiply right singular vectors of L in WORK(IR) by Q
2325 * in A, storing result in WORK(IU) and copying to A
2326 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2329 DO 30 I = 1, N, CHUNK
2330 BLK = MIN( N-I+1, CHUNK )
2331 CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2332 $ LDWRKR, A( 1, I ), LDA, CZERO,
2333 $ WORK( IU ), LDWRKU )
2334 CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2340 * Insufficient workspace for a fast algorithm
2348 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2349 * (RWorkspace: need M)
2351 CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
2352 $ WORK( ITAUQ ), WORK( ITAUP ),
2353 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2355 * Generate right vectors bidiagonalizing A
2356 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2359 CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2360 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2363 * Perform bidiagonal QR iteration, computing right
2364 * singular vectors of A in A
2366 * (RWorkspace: need BDSPAC)
2368 CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
2369 $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
2373 ELSE IF( WNTVO .AND. WNTUAS ) THEN
2375 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2376 * M right singular vectors to be overwritten on A and
2377 * M left singular vectors to be computed in U
2379 IF( LWORK.GE.M*M+3*M ) THEN
2381 * Sufficient workspace for a fast algorithm
2384 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2386 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2391 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2393 * WORK(IU) is LDA by N and WORK(IR) is M by M
2400 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2403 CHUNK = ( LWORK-M*M ) / M
2406 ITAU = IR + LDWRKR*M
2410 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2413 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2414 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2416 * Copy L to U, zeroing about above it
2418 CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2419 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2423 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2426 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2427 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2433 * Bidiagonalize L in U, copying result to WORK(IR)
2434 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2435 * (RWorkspace: need M)
2437 CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2438 $ WORK( ITAUQ ), WORK( ITAUP ),
2439 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2440 CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2442 * Generate right vectors bidiagonalizing L in WORK(IR)
2443 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2446 CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2447 $ WORK( ITAUP ), WORK( IWORK ),
2448 $ LWORK-IWORK+1, IERR )
2450 * Generate left vectors bidiagonalizing L in U
2451 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2454 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2455 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2458 * Perform bidiagonal QR iteration, computing left
2459 * singular vectors of L in U, and computing right
2460 * singular vectors of L in WORK(IR)
2461 * (CWorkspace: need M*M)
2462 * (RWorkspace: need BDSPAC)
2464 CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2465 $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
2466 $ RWORK( IRWORK ), INFO )
2469 * Multiply right singular vectors of L in WORK(IR) by Q
2470 * in A, storing result in WORK(IU) and copying to A
2471 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2474 DO 40 I = 1, N, CHUNK
2475 BLK = MIN( N-I+1, CHUNK )
2476 CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2477 $ LDWRKR, A( 1, I ), LDA, CZERO,
2478 $ WORK( IU ), LDWRKU )
2479 CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2485 * Insufficient workspace for a fast algorithm
2491 * (CWorkspace: need 2*M, prefer M+M*NB)
2494 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2495 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2497 * Copy L to U, zeroing out above it
2499 CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2500 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2504 * (CWorkspace: need 2*M, prefer M+M*NB)
2507 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2508 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2514 * Bidiagonalize L in U
2515 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2516 * (RWorkspace: need M)
2518 CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2519 $ WORK( ITAUQ ), WORK( ITAUP ),
2520 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2522 * Multiply right vectors bidiagonalizing L by Q in A
2523 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2526 CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2527 $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2528 $ LWORK-IWORK+1, IERR )
2530 * Generate left vectors bidiagonalizing L in U
2531 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2534 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2535 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2538 * Perform bidiagonal QR iteration, computing left
2539 * singular vectors of A in U and computing right
2540 * singular vectors of A in A
2542 * (RWorkspace: need BDSPAC)
2544 CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
2545 $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
2549 ELSE IF( WNTVS ) THEN
2553 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2554 * M right singular vectors to be computed in VT and
2555 * no left singular vectors to be computed
2557 IF( LWORK.GE.M*M+3*M ) THEN
2559 * Sufficient workspace for a fast algorithm
2562 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2564 * WORK(IR) is LDA by M
2569 * WORK(IR) is M by M
2573 ITAU = IR + LDWRKR*M
2577 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2580 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2581 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2583 * Copy L to WORK(IR), zeroing out above it
2585 CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
2587 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2588 $ WORK( IR+LDWRKR ), LDWRKR )
2591 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2594 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2595 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2601 * Bidiagonalize L in WORK(IR)
2602 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2603 * (RWorkspace: need M)
2605 CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
2606 $ RWORK( IE ), WORK( ITAUQ ),
2607 $ WORK( ITAUP ), WORK( IWORK ),
2608 $ LWORK-IWORK+1, IERR )
2610 * Generate right vectors bidiagonalizing L in
2612 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2615 CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2616 $ WORK( ITAUP ), WORK( IWORK ),
2617 $ LWORK-IWORK+1, IERR )
2620 * Perform bidiagonal QR iteration, computing right
2621 * singular vectors of L in WORK(IR)
2622 * (CWorkspace: need M*M)
2623 * (RWorkspace: need BDSPAC)
2625 CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2626 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2627 $ RWORK( IRWORK ), INFO )
2629 * Multiply right singular vectors of L in WORK(IR) by
2630 * Q in A, storing result in VT
2631 * (CWorkspace: need M*M)
2634 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
2635 $ LDWRKR, A, LDA, CZERO, VT, LDVT )
2639 * Insufficient workspace for a fast algorithm
2645 * (CWorkspace: need 2*M, prefer M+M*NB)
2648 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2649 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2653 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2656 * (CWorkspace: need 2*M, prefer M+M*NB)
2659 CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2660 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2666 * Zero out above L in A
2668 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2671 * Bidiagonalize L in A
2672 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2673 * (RWorkspace: need M)
2675 CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2676 $ WORK( ITAUQ ), WORK( ITAUP ),
2677 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2679 * Multiply right vectors bidiagonalizing L by Q in VT
2680 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2683 CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2684 $ WORK( ITAUP ), VT, LDVT,
2685 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2688 * Perform bidiagonal QR iteration, computing right
2689 * singular vectors of A in VT
2691 * (RWorkspace: need BDSPAC)
2693 CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
2694 $ LDVT, CDUM, 1, CDUM, 1,
2695 $ RWORK( IRWORK ), INFO )
2699 ELSE IF( WNTUO ) THEN
2701 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2702 * M right singular vectors to be computed in VT and
2703 * M left singular vectors to be overwritten on A
2705 IF( LWORK.GE.2*M*M+3*M ) THEN
2707 * Sufficient workspace for a fast algorithm
2710 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2712 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2717 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2719 * WORK(IU) is LDA by M and WORK(IR) is M by M
2726 * WORK(IU) is M by M and WORK(IR) is M by M
2732 ITAU = IR + LDWRKR*M
2736 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2739 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2740 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2742 * Copy L to WORK(IU), zeroing out below it
2744 CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2746 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2747 $ WORK( IU+LDWRKU ), LDWRKU )
2750 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2753 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2754 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2760 * Bidiagonalize L in WORK(IU), copying result to
2762 * (CWorkspace: need 2*M*M+3*M,
2763 * prefer 2*M*M+2*M+2*M*NB)
2764 * (RWorkspace: need M)
2766 CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2767 $ RWORK( IE ), WORK( ITAUQ ),
2768 $ WORK( ITAUP ), WORK( IWORK ),
2769 $ LWORK-IWORK+1, IERR )
2770 CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2771 $ WORK( IR ), LDWRKR )
2773 * Generate right bidiagonalizing vectors in WORK(IU)
2774 * (CWorkspace: need 2*M*M+3*M-1,
2775 * prefer 2*M*M+2*M+(M-1)*NB)
2778 CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2779 $ WORK( ITAUP ), WORK( IWORK ),
2780 $ LWORK-IWORK+1, IERR )
2782 * Generate left bidiagonalizing vectors in WORK(IR)
2783 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2786 CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2787 $ WORK( ITAUQ ), WORK( IWORK ),
2788 $ LWORK-IWORK+1, IERR )
2791 * Perform bidiagonal QR iteration, computing left
2792 * singular vectors of L in WORK(IR) and computing
2793 * right singular vectors of L in WORK(IU)
2794 * (CWorkspace: need 2*M*M)
2795 * (RWorkspace: need BDSPAC)
2797 CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2798 $ WORK( IU ), LDWRKU, WORK( IR ),
2799 $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
2802 * Multiply right singular vectors of L in WORK(IU) by
2803 * Q in A, storing result in VT
2804 * (CWorkspace: need M*M)
2807 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2808 $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2810 * Copy left singular vectors of L to A
2811 * (CWorkspace: need M*M)
2814 CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2819 * Insufficient workspace for a fast algorithm
2824 * Compute A=L*Q, copying result to VT
2825 * (CWorkspace: need 2*M, prefer M+M*NB)
2828 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2829 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2830 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2833 * (CWorkspace: need 2*M, prefer M+M*NB)
2836 CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2837 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2843 * Zero out above L in A
2845 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2848 * Bidiagonalize L in A
2849 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2850 * (RWorkspace: need M)
2852 CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2853 $ WORK( ITAUQ ), WORK( ITAUP ),
2854 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2856 * Multiply right vectors bidiagonalizing L by Q in VT
2857 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2860 CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2861 $ WORK( ITAUP ), VT, LDVT,
2862 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2864 * Generate left bidiagonalizing vectors of L in A
2865 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2868 CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2869 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2872 * Perform bidiagonal QR iteration, computing left
2873 * singular vectors of A in A and computing right
2874 * singular vectors of A in VT
2876 * (RWorkspace: need BDSPAC)
2878 CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2879 $ LDVT, A, LDA, CDUM, 1,
2880 $ RWORK( IRWORK ), INFO )
2884 ELSE IF( WNTUAS ) THEN
2886 * Path 6t(N much larger than M, JOBU='S' or 'A',
2888 * M right singular vectors to be computed in VT and
2889 * M left singular vectors to be computed in U
2891 IF( LWORK.GE.M*M+3*M ) THEN
2893 * Sufficient workspace for a fast algorithm
2896 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2898 * WORK(IU) is LDA by N
2903 * WORK(IU) is LDA by M
2907 ITAU = IU + LDWRKU*M
2911 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2914 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2915 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2917 * Copy L to WORK(IU), zeroing out above it
2919 CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2921 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2922 $ WORK( IU+LDWRKU ), LDWRKU )
2925 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2928 CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2929 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2935 * Bidiagonalize L in WORK(IU), copying result to U
2936 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2937 * (RWorkspace: need M)
2939 CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2940 $ RWORK( IE ), WORK( ITAUQ ),
2941 $ WORK( ITAUP ), WORK( IWORK ),
2942 $ LWORK-IWORK+1, IERR )
2943 CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2946 * Generate right bidiagonalizing vectors in WORK(IU)
2947 * (CWorkspace: need M*M+3*M-1,
2948 * prefer M*M+2*M+(M-1)*NB)
2951 CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2952 $ WORK( ITAUP ), WORK( IWORK ),
2953 $ LWORK-IWORK+1, IERR )
2955 * Generate left bidiagonalizing vectors in U
2956 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2959 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2960 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2963 * Perform bidiagonal QR iteration, computing left
2964 * singular vectors of L in U and computing right
2965 * singular vectors of L in WORK(IU)
2966 * (CWorkspace: need M*M)
2967 * (RWorkspace: need BDSPAC)
2969 CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2970 $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
2971 $ RWORK( IRWORK ), INFO )
2973 * Multiply right singular vectors of L in WORK(IU) by
2974 * Q in A, storing result in VT
2975 * (CWorkspace: need M*M)
2978 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2979 $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2983 * Insufficient workspace for a fast algorithm
2988 * Compute A=L*Q, copying result to VT
2989 * (CWorkspace: need 2*M, prefer M+M*NB)
2992 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2993 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2994 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2997 * (CWorkspace: need 2*M, prefer M+M*NB)
3000 CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
3001 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3003 * Copy L to U, zeroing out above it
3005 CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3006 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3013 * Bidiagonalize L in U
3014 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3015 * (RWorkspace: need M)
3017 CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
3018 $ WORK( ITAUQ ), WORK( ITAUP ),
3019 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3021 * Multiply right bidiagonalizing vectors in U by Q
3023 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3026 CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3027 $ WORK( ITAUP ), VT, LDVT,
3028 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3030 * Generate left bidiagonalizing vectors in U
3031 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3034 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3035 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3038 * Perform bidiagonal QR iteration, computing left
3039 * singular vectors of A in U and computing right
3040 * singular vectors of A in VT
3042 * (RWorkspace: need BDSPAC)
3044 CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3045 $ LDVT, U, LDU, CDUM, 1,
3046 $ RWORK( IRWORK ), INFO )
3052 ELSE IF( WNTVA ) THEN
3056 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3057 * N right singular vectors to be computed in VT and
3058 * no left singular vectors to be computed
3060 IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3062 * Sufficient workspace for a fast algorithm
3065 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3067 * WORK(IR) is LDA by M
3072 * WORK(IR) is M by M
3076 ITAU = IR + LDWRKR*M
3079 * Compute A=L*Q, copying result to VT
3080 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3083 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3084 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3085 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3087 * Copy L to WORK(IR), zeroing out above it
3089 CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
3091 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3092 $ WORK( IR+LDWRKR ), LDWRKR )
3095 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3098 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3099 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3105 * Bidiagonalize L in WORK(IR)
3106 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3107 * (RWorkspace: need M)
3109 CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
3110 $ RWORK( IE ), WORK( ITAUQ ),
3111 $ WORK( ITAUP ), WORK( IWORK ),
3112 $ LWORK-IWORK+1, IERR )
3114 * Generate right bidiagonalizing vectors in WORK(IR)
3115 * (CWorkspace: need M*M+3*M-1,
3116 * prefer M*M+2*M+(M-1)*NB)
3119 CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
3120 $ WORK( ITAUP ), WORK( IWORK ),
3121 $ LWORK-IWORK+1, IERR )
3124 * Perform bidiagonal QR iteration, computing right
3125 * singular vectors of L in WORK(IR)
3126 * (CWorkspace: need M*M)
3127 * (RWorkspace: need BDSPAC)
3129 CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
3130 $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
3131 $ RWORK( IRWORK ), INFO )
3133 * Multiply right singular vectors of L in WORK(IR) by
3134 * Q in VT, storing result in A
3135 * (CWorkspace: need M*M)
3138 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
3139 $ LDWRKR, VT, LDVT, CZERO, A, LDA )
3141 * Copy right singular vectors of A from A to VT
3143 CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3147 * Insufficient workspace for a fast algorithm
3152 * Compute A=L*Q, copying result to VT
3153 * (CWorkspace: need 2*M, prefer M+M*NB)
3156 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3157 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3158 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3161 * (CWorkspace: need M+N, prefer M+N*NB)
3164 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3165 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3171 * Zero out above L in A
3173 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3176 * Bidiagonalize L in A
3177 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3178 * (RWorkspace: need M)
3180 CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3181 $ WORK( ITAUQ ), WORK( ITAUP ),
3182 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3184 * Multiply right bidiagonalizing vectors in A by Q
3186 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3189 CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3190 $ WORK( ITAUP ), VT, LDVT,
3191 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3194 * Perform bidiagonal QR iteration, computing right
3195 * singular vectors of A in VT
3197 * (RWorkspace: need BDSPAC)
3199 CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
3200 $ LDVT, CDUM, 1, CDUM, 1,
3201 $ RWORK( IRWORK ), INFO )
3205 ELSE IF( WNTUO ) THEN
3207 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3208 * N right singular vectors to be computed in VT and
3209 * M left singular vectors to be overwritten on A
3211 IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
3213 * Sufficient workspace for a fast algorithm
3216 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3218 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3223 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3225 * WORK(IU) is LDA by M and WORK(IR) is M by M
3232 * WORK(IU) is M by M and WORK(IR) is M by M
3238 ITAU = IR + LDWRKR*M
3241 * Compute A=L*Q, copying result to VT
3242 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3245 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3246 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3247 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3250 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3253 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3254 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3256 * Copy L to WORK(IU), zeroing out above it
3258 CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3260 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3261 $ WORK( IU+LDWRKU ), LDWRKU )
3267 * Bidiagonalize L in WORK(IU), copying result to
3269 * (CWorkspace: need 2*M*M+3*M,
3270 * prefer 2*M*M+2*M+2*M*NB)
3271 * (RWorkspace: need M)
3273 CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3274 $ RWORK( IE ), WORK( ITAUQ ),
3275 $ WORK( ITAUP ), WORK( IWORK ),
3276 $ LWORK-IWORK+1, IERR )
3277 CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3278 $ WORK( IR ), LDWRKR )
3280 * Generate right bidiagonalizing vectors in WORK(IU)
3281 * (CWorkspace: need 2*M*M+3*M-1,
3282 * prefer 2*M*M+2*M+(M-1)*NB)
3285 CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3286 $ WORK( ITAUP ), WORK( IWORK ),
3287 $ LWORK-IWORK+1, IERR )
3289 * Generate left bidiagonalizing vectors in WORK(IR)
3290 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3293 CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3294 $ WORK( ITAUQ ), WORK( IWORK ),
3295 $ LWORK-IWORK+1, IERR )
3298 * Perform bidiagonal QR iteration, computing left
3299 * singular vectors of L in WORK(IR) and computing
3300 * right singular vectors of L in WORK(IU)
3301 * (CWorkspace: need 2*M*M)
3302 * (RWorkspace: need BDSPAC)
3304 CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3305 $ WORK( IU ), LDWRKU, WORK( IR ),
3306 $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
3309 * Multiply right singular vectors of L in WORK(IU) by
3310 * Q in VT, storing result in A
3311 * (CWorkspace: need M*M)
3314 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3315 $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3317 * Copy right singular vectors of A from A to VT
3319 CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3321 * Copy left singular vectors of A from WORK(IR) to A
3323 CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3328 * Insufficient workspace for a fast algorithm
3333 * Compute A=L*Q, copying result to VT
3334 * (CWorkspace: need 2*M, prefer M+M*NB)
3337 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3338 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3339 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3342 * (CWorkspace: need M+N, prefer M+N*NB)
3345 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3346 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3352 * Zero out above L in A
3354 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3357 * Bidiagonalize L in A
3358 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3359 * (RWorkspace: need M)
3361 CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3362 $ WORK( ITAUQ ), WORK( ITAUP ),
3363 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3365 * Multiply right bidiagonalizing vectors in A by Q
3367 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3370 CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3371 $ WORK( ITAUP ), VT, LDVT,
3372 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3374 * Generate left bidiagonalizing vectors in A
3375 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3378 CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3379 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3382 * Perform bidiagonal QR iteration, computing left
3383 * singular vectors of A in A and computing right
3384 * singular vectors of A in VT
3386 * (RWorkspace: need BDSPAC)
3388 CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3389 $ LDVT, A, LDA, CDUM, 1,
3390 $ RWORK( IRWORK ), INFO )
3394 ELSE IF( WNTUAS ) THEN
3396 * Path 9t(N much larger than M, JOBU='S' or 'A',
3398 * N right singular vectors to be computed in VT and
3399 * M left singular vectors to be computed in U
3401 IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3403 * Sufficient workspace for a fast algorithm
3406 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3408 * WORK(IU) is LDA by M
3413 * WORK(IU) is M by M
3417 ITAU = IU + LDWRKU*M
3420 * Compute A=L*Q, copying result to VT
3421 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3424 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3425 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3426 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3429 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3432 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3433 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3435 * Copy L to WORK(IU), zeroing out above it
3437 CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3439 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3440 $ WORK( IU+LDWRKU ), LDWRKU )
3446 * Bidiagonalize L in WORK(IU), copying result to U
3447 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3448 * (RWorkspace: need M)
3450 CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3451 $ RWORK( IE ), WORK( ITAUQ ),
3452 $ WORK( ITAUP ), WORK( IWORK ),
3453 $ LWORK-IWORK+1, IERR )
3454 CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3457 * Generate right bidiagonalizing vectors in WORK(IU)
3458 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3461 CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3462 $ WORK( ITAUP ), WORK( IWORK ),
3463 $ LWORK-IWORK+1, IERR )
3465 * Generate left bidiagonalizing vectors in U
3466 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3469 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3470 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3473 * Perform bidiagonal QR iteration, computing left
3474 * singular vectors of L in U and computing right
3475 * singular vectors of L in WORK(IU)
3476 * (CWorkspace: need M*M)
3477 * (RWorkspace: need BDSPAC)
3479 CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3480 $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
3481 $ RWORK( IRWORK ), INFO )
3483 * Multiply right singular vectors of L in WORK(IU) by
3484 * Q in VT, storing result in A
3485 * (CWorkspace: need M*M)
3488 CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3489 $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3491 * Copy right singular vectors of A from A to VT
3493 CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3497 * Insufficient workspace for a fast algorithm
3502 * Compute A=L*Q, copying result to VT
3503 * (CWorkspace: need 2*M, prefer M+M*NB)
3506 CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3507 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3508 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3511 * (CWorkspace: need M+N, prefer M+N*NB)
3514 CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3515 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3517 * Copy L to U, zeroing out above it
3519 CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3520 CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3527 * Bidiagonalize L in U
3528 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3529 * (RWorkspace: need M)
3531 CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
3532 $ WORK( ITAUQ ), WORK( ITAUP ),
3533 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3535 * Multiply right bidiagonalizing vectors in U by Q
3537 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3540 CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3541 $ WORK( ITAUP ), VT, LDVT,
3542 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3544 * Generate left bidiagonalizing vectors in U
3545 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3548 CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3549 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3552 * Perform bidiagonal QR iteration, computing left
3553 * singular vectors of A in U and computing right
3554 * singular vectors of A in VT
3556 * (RWorkspace: need BDSPAC)
3558 CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3559 $ LDVT, U, LDU, CDUM, 1,
3560 $ RWORK( IRWORK ), INFO )
3572 * Path 10t(N greater than M, but not much larger)
3573 * Reduce to bidiagonal form without LQ decomposition
3581 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3584 CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
3585 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3589 * If left singular vectors desired in U, copy result to U
3590 * and generate left bidiagonalizing vectors in U
3591 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3594 CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3595 CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3596 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3600 * If right singular vectors desired in VT, copy result to
3601 * VT and generate right bidiagonalizing vectors in VT
3602 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3605 CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3610 CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3611 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3615 * If left singular vectors desired in A, generate left
3616 * bidiagonalizing vectors in A
3617 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3620 CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3621 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3625 * If right singular vectors desired in A, generate right
3626 * bidiagonalizing vectors in A
3627 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3630 CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3631 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3634 IF( WNTUAS .OR. WNTUO )
3638 IF( WNTVAS .OR. WNTVO )
3642 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3644 * Perform bidiagonal QR iteration, if desired, computing
3645 * left singular vectors in U and computing right singular
3648 * (RWorkspace: need BDSPAC)
3650 CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3651 $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
3653 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3655 * Perform bidiagonal QR iteration, if desired, computing
3656 * left singular vectors in U and computing right singular
3659 * (RWorkspace: need BDSPAC)
3661 CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
3662 $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
3666 * Perform bidiagonal QR iteration, if desired, computing
3667 * left singular vectors in A and computing right singular
3670 * (RWorkspace: need BDSPAC)
3672 CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3673 $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
3681 * Undo scaling if necessary
3683 IF( ISCL.EQ.1 ) THEN
3684 IF( ANRM.GT.BIGNUM )
3685 $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3687 IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3688 $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
3689 $ RWORK( IE ), MINMN, IERR )
3690 IF( ANRM.LT.SMLNUM )
3691 $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3693 IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3694 $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
3695 $ RWORK( IE ), MINMN, IERR )
3698 * Return optimal workspace in WORK(1)