14 typedef long long BLASLONG;
15 typedef unsigned long long BLASULONG;
17 typedef long BLASLONG;
18 typedef unsigned long BLASULONG;
22 typedef BLASLONG blasint;
24 #define blasabs(x) llabs(x)
26 #define blasabs(x) labs(x)
30 #define blasabs(x) abs(x)
33 typedef blasint integer;
35 typedef unsigned int uinteger;
36 typedef char *address;
37 typedef short int shortint;
39 typedef double doublereal;
40 typedef struct { real r, i; } complex;
41 typedef struct { doublereal r, i; } doublecomplex;
43 static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
44 static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
45 static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
46 static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
48 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
49 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
50 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
51 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
53 #define pCf(z) (*_pCf(z))
54 #define pCd(z) (*_pCd(z))
56 typedef short int shortlogical;
57 typedef char logical1;
58 typedef char integer1;
63 /* Extern is for use with -E */
74 /*external read, write*/
83 /*internal read, write*/
113 /*rewind, backspace, endfile*/
125 ftnint *inex; /*parameters in standard's order*/
151 union Multitype { /* for multiple entry points */
162 typedef union Multitype Multitype;
164 struct Vardesc { /* for Namelist */
170 typedef struct Vardesc Vardesc;
177 typedef struct Namelist Namelist;
179 #define abs(x) ((x) >= 0 ? (x) : -(x))
180 #define dabs(x) (fabs(x))
181 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
182 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
183 #define dmin(a,b) (f2cmin(a,b))
184 #define dmax(a,b) (f2cmax(a,b))
185 #define bit_test(a,b) ((a) >> (b) & 1)
186 #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
187 #define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
189 #define abort_() { sig_die("Fortran abort routine called", 1); }
190 #define c_abs(z) (cabsf(Cf(z)))
191 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
193 #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
194 #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
196 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
197 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
199 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
200 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
201 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
202 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
203 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
204 #define d_abs(x) (fabs(*(x)))
205 #define d_acos(x) (acos(*(x)))
206 #define d_asin(x) (asin(*(x)))
207 #define d_atan(x) (atan(*(x)))
208 #define d_atn2(x, y) (atan2(*(x),*(y)))
209 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
210 #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
211 #define d_cos(x) (cos(*(x)))
212 #define d_cosh(x) (cosh(*(x)))
213 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
214 #define d_exp(x) (exp(*(x)))
215 #define d_imag(z) (cimag(Cd(z)))
216 #define r_imag(z) (cimagf(Cf(z)))
217 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
218 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
219 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
220 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
221 #define d_log(x) (log(*(x)))
222 #define d_mod(x, y) (fmod(*(x), *(y)))
223 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
224 #define d_nint(x) u_nint(*(x))
225 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
226 #define d_sign(a,b) u_sign(*(a),*(b))
227 #define r_sign(a,b) u_sign(*(a),*(b))
228 #define d_sin(x) (sin(*(x)))
229 #define d_sinh(x) (sinh(*(x)))
230 #define d_sqrt(x) (sqrt(*(x)))
231 #define d_tan(x) (tan(*(x)))
232 #define d_tanh(x) (tanh(*(x)))
233 #define i_abs(x) abs(*(x))
234 #define i_dnnt(x) ((integer)u_nint(*(x)))
235 #define i_len(s, n) (n)
236 #define i_nint(x) ((integer)u_nint(*(x)))
237 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
238 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
239 #define pow_si(B,E) spow_ui(*(B),*(E))
240 #define pow_ri(B,E) spow_ui(*(B),*(E))
241 #define pow_di(B,E) dpow_ui(*(B),*(E))
242 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
243 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
244 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
245 #define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
246 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
247 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
248 #define sig_die(s, kill) { exit(1); }
249 #define s_stop(s, n) {exit(0);}
250 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
251 #define z_abs(z) (cabs(Cd(z)))
252 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
253 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
254 #define myexit_() break;
255 #define mycycle() continue;
256 #define myceiling(w) {ceil(w)}
257 #define myhuge(w) {HUGE_VAL}
258 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
259 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
261 /* procedure parameter types for -A and -C++ */
263 #define F2C_proc_par_types 1
265 typedef logical (*L_fp)(...);
267 typedef logical (*L_fp)();
270 static float spow_ui(float x, integer n) {
271 float pow=1.0; unsigned long int u;
273 if(n < 0) n = -n, x = 1/x;
282 static double dpow_ui(double x, integer n) {
283 double pow=1.0; unsigned long int u;
285 if(n < 0) n = -n, x = 1/x;
295 static _Fcomplex cpow_ui(complex x, integer n) {
296 complex pow={1.0,0.0}; unsigned long int u;
298 if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
300 if(u & 01) pow.r *= x.r, pow.i *= x.i;
301 if(u >>= 1) x.r *= x.r, x.i *= x.i;
305 _Fcomplex p={pow.r, pow.i};
309 static _Complex float cpow_ui(_Complex float x, integer n) {
310 _Complex float pow=1.0; unsigned long int u;
312 if(n < 0) n = -n, x = 1/x;
323 static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
324 _Dcomplex pow={1.0,0.0}; unsigned long int u;
326 if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
328 if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
329 if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
333 _Dcomplex p = {pow._Val[0], pow._Val[1]};
337 static _Complex double zpow_ui(_Complex double x, integer n) {
338 _Complex double pow=1.0; unsigned long int u;
340 if(n < 0) n = -n, x = 1/x;
350 static integer pow_ii(integer x, integer n) {
351 integer pow; unsigned long int u;
353 if (n == 0 || x == 1) pow = 1;
354 else if (x != -1) pow = x == 0 ? 1/x : 0;
357 if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
367 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
369 double m; integer i, mi;
370 for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
371 if (w[i-1]>m) mi=i ,m=w[i-1];
374 static integer smaxloc_(float *w, integer s, integer e, integer *n)
376 float m; integer i, mi;
377 for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
378 if (w[i-1]>m) mi=i ,m=w[i-1];
381 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
382 integer n = *n_, incx = *incx_, incy = *incy_, i;
384 _Fcomplex zdotc = {0.0, 0.0};
385 if (incx == 1 && incy == 1) {
386 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
387 zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
388 zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
391 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
392 zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
393 zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
399 _Complex float zdotc = 0.0;
400 if (incx == 1 && incy == 1) {
401 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
402 zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
405 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
406 zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
412 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
413 integer n = *n_, incx = *incx_, incy = *incy_, i;
415 _Dcomplex zdotc = {0.0, 0.0};
416 if (incx == 1 && incy == 1) {
417 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
418 zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
419 zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
422 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
423 zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
424 zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
430 _Complex double zdotc = 0.0;
431 if (incx == 1 && incy == 1) {
432 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
433 zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
436 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
437 zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
443 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
444 integer n = *n_, incx = *incx_, incy = *incy_, i;
446 _Fcomplex zdotc = {0.0, 0.0};
447 if (incx == 1 && incy == 1) {
448 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
449 zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
450 zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
453 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
454 zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
455 zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
461 _Complex float zdotc = 0.0;
462 if (incx == 1 && incy == 1) {
463 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
464 zdotc += Cf(&x[i]) * Cf(&y[i]);
467 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
468 zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
474 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
475 integer n = *n_, incx = *incx_, incy = *incy_, i;
477 _Dcomplex zdotc = {0.0, 0.0};
478 if (incx == 1 && incy == 1) {
479 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
480 zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
481 zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
484 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
485 zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
486 zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
492 _Complex double zdotc = 0.0;
493 if (incx == 1 && incy == 1) {
494 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
495 zdotc += Cd(&x[i]) * Cd(&y[i]);
498 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
499 zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
505 /* -- translated by f2c (version 20000121).
506 You must link the resulting object file with the libraries:
507 -lf2c -lm (in that order)
513 /* Table of constant values */
515 static integer c__6 = 6;
516 static integer c__0 = 0;
517 static integer c__2 = 2;
518 static integer c_n1 = -1;
519 static real c_b57 = 0.f;
520 static integer c__1 = 1;
521 static real c_b79 = 1.f;
523 /* > \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b> */
525 /* =========== DOCUMENTATION =========== */
527 /* Online html documentation available at */
528 /* http://www.netlib.org/lapack/explore-html/ */
531 /* > Download SGESVD + dependencies */
532 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.
535 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.
538 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.
546 /* SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */
547 /* WORK, LWORK, INFO ) */
549 /* CHARACTER JOBU, JOBVT */
550 /* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */
551 /* REAL A( LDA, * ), S( * ), U( LDU, * ), */
552 /* $ VT( LDVT, * ), WORK( * ) */
555 /* > \par Purpose: */
560 /* > SGESVD computes the singular value decomposition (SVD) of a real */
561 /* > M-by-N matrix A, optionally computing the left and/or right singular */
562 /* > vectors. The SVD is written */
564 /* > A = U * SIGMA * transpose(V) */
566 /* > where SIGMA is an M-by-N matrix which is zero except for its */
567 /* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
568 /* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */
569 /* > are the singular values of A; they are real and non-negative, and */
570 /* > are returned in descending order. The first f2cmin(m,n) columns of */
571 /* > U and V are the left and right singular vectors of A. */
573 /* > Note that the routine returns V**T, not V. */
579 /* > \param[in] JOBU */
581 /* > JOBU is CHARACTER*1 */
582 /* > Specifies options for computing all or part of the matrix U: */
583 /* > = 'A': all M columns of U are returned in array U: */
584 /* > = 'S': the first f2cmin(m,n) columns of U (the left singular */
585 /* > vectors) are returned in the array U; */
586 /* > = 'O': the first f2cmin(m,n) columns of U (the left singular */
587 /* > vectors) are overwritten on the array A; */
588 /* > = 'N': no columns of U (no left singular vectors) are */
592 /* > \param[in] JOBVT */
594 /* > JOBVT is CHARACTER*1 */
595 /* > Specifies options for computing all or part of the matrix */
597 /* > = 'A': all N rows of V**T are returned in the array VT; */
598 /* > = 'S': the first f2cmin(m,n) rows of V**T (the right singular */
599 /* > vectors) are returned in the array VT; */
600 /* > = 'O': the first f2cmin(m,n) rows of V**T (the right singular */
601 /* > vectors) are overwritten on the array A; */
602 /* > = 'N': no rows of V**T (no right singular vectors) are */
605 /* > JOBVT and JOBU cannot both be 'O'. */
611 /* > The number of rows of the input matrix A. M >= 0. */
617 /* > The number of columns of the input matrix A. N >= 0. */
620 /* > \param[in,out] A */
622 /* > A is REAL array, dimension (LDA,N) */
623 /* > On entry, the M-by-N matrix A. */
625 /* > if JOBU = 'O', A is overwritten with the first f2cmin(m,n) */
626 /* > columns of U (the left singular vectors, */
627 /* > stored columnwise); */
628 /* > if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */
629 /* > rows of V**T (the right singular vectors, */
630 /* > stored rowwise); */
631 /* > if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */
632 /* > are destroyed. */
635 /* > \param[in] LDA */
637 /* > LDA is INTEGER */
638 /* > The leading dimension of the array A. LDA >= f2cmax(1,M). */
641 /* > \param[out] S */
643 /* > S is REAL array, dimension (f2cmin(M,N)) */
644 /* > The singular values of A, sorted so that S(i) >= S(i+1). */
647 /* > \param[out] U */
649 /* > U is REAL array, dimension (LDU,UCOL) */
650 /* > (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */
651 /* > If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */
652 /* > if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */
653 /* > (the left singular vectors, stored columnwise); */
654 /* > if JOBU = 'N' or 'O', U is not referenced. */
657 /* > \param[in] LDU */
659 /* > LDU is INTEGER */
660 /* > The leading dimension of the array U. LDU >= 1; if */
661 /* > JOBU = 'S' or 'A', LDU >= M. */
664 /* > \param[out] VT */
666 /* > VT is REAL array, dimension (LDVT,N) */
667 /* > If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */
669 /* > if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */
670 /* > V**T (the right singular vectors, stored rowwise); */
671 /* > if JOBVT = 'N' or 'O', VT is not referenced. */
674 /* > \param[in] LDVT */
676 /* > LDVT is INTEGER */
677 /* > The leading dimension of the array VT. LDVT >= 1; if */
678 /* > JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */
681 /* > \param[out] WORK */
683 /* > WORK is REAL array, dimension (MAX(1,LWORK)) */
684 /* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
685 /* > if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */
686 /* > superdiagonal elements of an upper bidiagonal matrix B */
687 /* > whose diagonal is in S (not necessarily sorted). B */
688 /* > satisfies A = U * B * VT, so it has the same singular values */
689 /* > as A, and singular vectors related by U and VT. */
692 /* > \param[in] LWORK */
694 /* > LWORK is INTEGER */
695 /* > The dimension of the array WORK. */
696 /* > LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */
697 /* > - PATH 1 (M much larger than N, JOBU='N') */
698 /* > - PATH 1t (N much larger than M, JOBVT='N') */
699 /* > LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths */
700 /* > For good performance, LWORK should generally be larger. */
702 /* > If LWORK = -1, then a workspace query is assumed; the routine */
703 /* > only calculates the optimal size of the WORK array, returns */
704 /* > this value as the first entry of the WORK array, and no error */
705 /* > message related to LWORK is issued by XERBLA. */
708 /* > \param[out] INFO */
710 /* > INFO is INTEGER */
711 /* > = 0: successful exit. */
712 /* > < 0: if INFO = -i, the i-th argument had an illegal value. */
713 /* > > 0: if SBDSQR did not converge, INFO specifies how many */
714 /* > superdiagonals of an intermediate bidiagonal form B */
715 /* > did not converge to zero. See the description of WORK */
716 /* > above for details. */
722 /* > \author Univ. of Tennessee */
723 /* > \author Univ. of California Berkeley */
724 /* > \author Univ. of Colorado Denver */
725 /* > \author NAG Ltd. */
727 /* > \date April 2012 */
729 /* > \ingroup realGEsing */
731 /* ===================================================================== */
732 /* Subroutine */ int sgesvd_(char *jobu, char *jobvt, integer *m, integer *n,
733 real *a, integer *lda, real *s, real *u, integer *ldu, real *vt,
734 integer *ldvt, real *work, integer *lwork, integer *info)
736 /* System generated locals */
738 integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2],
742 /* Local variables */
745 integer ierr, itau, ncvt, nrvt, lwork_sgebrd__, lwork_sgelqf__,
747 extern logical lsame_(char *, char *);
749 extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
750 integer *, real *, real *, integer *, real *, integer *, real *,
752 integer minmn, wrkbl, itaup, itauq, mnthr, iwork;
753 logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
754 integer ie, ir, bdspac, iu;
755 extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer
756 *, real *, real *, real *, real *, real *, integer *, integer *);
757 extern real slamch_(char *), slange_(char *, integer *, integer *,
758 real *, integer *, real *);
759 extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
760 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
761 integer *, integer *, ftnlen, ftnlen);
763 extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
764 *, real *, real *, integer *, integer *), slascl_(char *, integer
765 *, integer *, real *, real *, integer *, integer *, real *,
766 integer *, integer *), sgeqrf_(integer *, integer *, real
767 *, integer *, real *, real *, integer *, integer *), slacpy_(char
768 *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
769 real *, integer *), sbdsqr_(char *, integer *, integer *,
770 integer *, integer *, real *, real *, real *, integer *, real *,
771 integer *, real *, integer *, real *, integer *), sorgbr_(
772 char *, integer *, integer *, integer *, real *, integer *, real *
773 , real *, integer *, integer *), sormbr_(char *, char *,
774 char *, integer *, integer *, integer *, real *, integer *, real *
775 , real *, integer *, real *, integer *, integer *);
776 integer ldwrkr, minwrk, ldwrku, maxwrk;
777 extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
778 *, integer *, real *, real *, integer *, integer *);
780 extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
781 *, integer *, real *, real *, integer *, integer *);
782 logical lquery, wntuas, wntvas;
783 integer blk, lwork_sorgbr_p__, lwork_sorgbr_q__, lwork_sorglq_m__,
784 lwork_sorglq_n__, ncu, lwork_sorgqr_n__, lwork_sorgqr_m__;
789 /* -- LAPACK driver routine (version 3.7.0) -- */
790 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
791 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
795 /* ===================================================================== */
798 /* Test the input arguments */
800 /* Parameter adjustments */
802 a_offset = 1 + a_dim1 * 1;
806 u_offset = 1 + u_dim1 * 1;
809 vt_offset = 1 + vt_dim1 * 1;
815 minmn = f2cmin(*m,*n);
816 wntua = lsame_(jobu, "A");
817 wntus = lsame_(jobu, "S");
818 wntuas = wntua || wntus;
819 wntuo = lsame_(jobu, "O");
820 wntun = lsame_(jobu, "N");
821 wntva = lsame_(jobvt, "A");
822 wntvs = lsame_(jobvt, "S");
823 wntvas = wntva || wntvs;
824 wntvo = lsame_(jobvt, "O");
825 wntvn = lsame_(jobvt, "N");
826 lquery = *lwork == -1;
828 if (! (wntua || wntus || wntuo || wntun)) {
830 } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
836 } else if (*lda < f2cmax(1,*m)) {
838 } else if (*ldu < 1 || wntuas && *ldu < *m) {
840 } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
844 /* Compute workspace */
845 /* (Note: Comments in the code beginning "Workspace:" describe the */
846 /* minimal amount of workspace needed at that point in the code, */
847 /* as well as the preferred amount for good performance. */
848 /* NB refers to the optimal block size for the immediately */
849 /* following subroutine, as returned by ILAENV.) */
854 if (*m >= *n && minmn > 0) {
856 /* Compute space needed for SBDSQR */
858 /* Writing concatenation */
859 i__1[0] = 1, a__1[0] = jobu;
860 i__1[1] = 1, a__1[1] = jobvt;
861 s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
862 mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
863 ftnlen)6, (ftnlen)2);
865 /* Compute space needed for SGEQRF */
866 sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
867 lwork_sgeqrf__ = (integer) dum[0];
868 /* Compute space needed for SORGQR */
869 sorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
870 lwork_sorgqr_n__ = (integer) dum[0];
871 sorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
872 lwork_sorgqr_m__ = (integer) dum[0];
873 /* Compute space needed for SGEBRD */
874 sgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
876 lwork_sgebrd__ = (integer) dum[0];
877 /* Compute space needed for SORGBR P */
878 sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
879 lwork_sorgbr_p__ = (integer) dum[0];
880 /* Compute space needed for SORGBR Q */
881 sorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
882 lwork_sorgbr_q__ = (integer) dum[0];
887 /* Path 1 (M much larger than N, JOBU='N') */
889 maxwrk = *n + lwork_sgeqrf__;
891 i__2 = maxwrk, i__3 = *n * 3 + lwork_sgebrd__;
892 maxwrk = f2cmax(i__2,i__3);
893 if (wntvo || wntvas) {
895 i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
896 maxwrk = f2cmax(i__2,i__3);
898 maxwrk = f2cmax(maxwrk,bdspac);
901 minwrk = f2cmax(i__2,bdspac);
902 } else if (wntuo && wntvn) {
904 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
906 wrkbl = *n + lwork_sgeqrf__;
908 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
909 wrkbl = f2cmax(i__2,i__3);
911 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
912 wrkbl = f2cmax(i__2,i__3);
914 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
915 wrkbl = f2cmax(i__2,i__3);
916 wrkbl = f2cmax(wrkbl,bdspac);
918 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
919 maxwrk = f2cmax(i__2,i__3);
922 minwrk = f2cmax(i__2,bdspac);
923 } else if (wntuo && wntvas) {
925 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */
928 wrkbl = *n + lwork_sgeqrf__;
930 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
931 wrkbl = f2cmax(i__2,i__3);
933 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
934 wrkbl = f2cmax(i__2,i__3);
936 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
937 wrkbl = f2cmax(i__2,i__3);
939 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
940 wrkbl = f2cmax(i__2,i__3);
941 wrkbl = f2cmax(wrkbl,bdspac);
943 i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
944 maxwrk = f2cmax(i__2,i__3);
947 minwrk = f2cmax(i__2,bdspac);
948 } else if (wntus && wntvn) {
950 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
952 wrkbl = *n + lwork_sgeqrf__;
954 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
955 wrkbl = f2cmax(i__2,i__3);
957 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
958 wrkbl = f2cmax(i__2,i__3);
960 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
961 wrkbl = f2cmax(i__2,i__3);
962 wrkbl = f2cmax(wrkbl,bdspac);
963 maxwrk = *n * *n + wrkbl;
966 minwrk = f2cmax(i__2,bdspac);
967 } else if (wntus && wntvo) {
969 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
971 wrkbl = *n + lwork_sgeqrf__;
973 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
974 wrkbl = f2cmax(i__2,i__3);
976 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
977 wrkbl = f2cmax(i__2,i__3);
979 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
980 wrkbl = f2cmax(i__2,i__3);
982 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
983 wrkbl = f2cmax(i__2,i__3);
984 wrkbl = f2cmax(wrkbl,bdspac);
985 maxwrk = (*n << 1) * *n + wrkbl;
988 minwrk = f2cmax(i__2,bdspac);
989 } else if (wntus && wntvas) {
991 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
994 wrkbl = *n + lwork_sgeqrf__;
996 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
997 wrkbl = f2cmax(i__2,i__3);
999 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
1000 wrkbl = f2cmax(i__2,i__3);
1002 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
1003 wrkbl = f2cmax(i__2,i__3);
1005 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
1006 wrkbl = f2cmax(i__2,i__3);
1007 wrkbl = f2cmax(wrkbl,bdspac);
1008 maxwrk = *n * *n + wrkbl;
1011 minwrk = f2cmax(i__2,bdspac);
1012 } else if (wntua && wntvn) {
1014 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
1016 wrkbl = *n + lwork_sgeqrf__;
1018 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
1019 wrkbl = f2cmax(i__2,i__3);
1021 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
1022 wrkbl = f2cmax(i__2,i__3);
1024 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
1025 wrkbl = f2cmax(i__2,i__3);
1026 wrkbl = f2cmax(wrkbl,bdspac);
1027 maxwrk = *n * *n + wrkbl;
1030 minwrk = f2cmax(i__2,bdspac);
1031 } else if (wntua && wntvo) {
1033 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
1035 wrkbl = *n + lwork_sgeqrf__;
1037 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
1038 wrkbl = f2cmax(i__2,i__3);
1040 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
1041 wrkbl = f2cmax(i__2,i__3);
1043 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
1044 wrkbl = f2cmax(i__2,i__3);
1046 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
1047 wrkbl = f2cmax(i__2,i__3);
1048 wrkbl = f2cmax(wrkbl,bdspac);
1049 maxwrk = (*n << 1) * *n + wrkbl;
1052 minwrk = f2cmax(i__2,bdspac);
1053 } else if (wntua && wntvas) {
1055 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */
1058 wrkbl = *n + lwork_sgeqrf__;
1060 i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
1061 wrkbl = f2cmax(i__2,i__3);
1063 i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
1064 wrkbl = f2cmax(i__2,i__3);
1066 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
1067 wrkbl = f2cmax(i__2,i__3);
1069 i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
1070 wrkbl = f2cmax(i__2,i__3);
1071 wrkbl = f2cmax(wrkbl,bdspac);
1072 maxwrk = *n * *n + wrkbl;
1075 minwrk = f2cmax(i__2,bdspac);
1079 /* Path 10 (M at least N, but not much larger) */
1081 sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
1083 lwork_sgebrd__ = (integer) dum[0];
1084 maxwrk = *n * 3 + lwork_sgebrd__;
1085 if (wntus || wntuo) {
1086 sorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1,
1088 lwork_sorgbr_q__ = (integer) dum[0];
1090 i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
1091 maxwrk = f2cmax(i__2,i__3);
1094 sorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1,
1096 lwork_sorgbr_q__ = (integer) dum[0];
1098 i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
1099 maxwrk = f2cmax(i__2,i__3);
1103 i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
1104 maxwrk = f2cmax(i__2,i__3);
1106 maxwrk = f2cmax(maxwrk,bdspac);
1109 minwrk = f2cmax(i__2,bdspac);
1111 } else if (minmn > 0) {
1113 /* Compute space needed for SBDSQR */
1115 /* Writing concatenation */
1116 i__1[0] = 1, a__1[0] = jobu;
1117 i__1[1] = 1, a__1[1] = jobvt;
1118 s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
1119 mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
1120 ftnlen)6, (ftnlen)2);
1122 /* Compute space needed for SGELQF */
1123 sgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
1124 lwork_sgelqf__ = (integer) dum[0];
1125 /* Compute space needed for SORGLQ */
1126 sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
1127 lwork_sorglq_n__ = (integer) dum[0];
1128 sorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
1129 lwork_sorglq_m__ = (integer) dum[0];
1130 /* Compute space needed for SGEBRD */
1131 sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
1133 lwork_sgebrd__ = (integer) dum[0];
1134 /* Compute space needed for SORGBR P */
1135 sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
1136 lwork_sorgbr_p__ = (integer) dum[0];
1137 /* Compute space needed for SORGBR Q */
1138 sorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
1139 lwork_sorgbr_q__ = (integer) dum[0];
1143 /* Path 1t(N much larger than M, JOBVT='N') */
1145 maxwrk = *m + lwork_sgelqf__;
1147 i__2 = maxwrk, i__3 = *m * 3 + lwork_sgebrd__;
1148 maxwrk = f2cmax(i__2,i__3);
1149 if (wntuo || wntuas) {
1151 i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
1152 maxwrk = f2cmax(i__2,i__3);
1154 maxwrk = f2cmax(maxwrk,bdspac);
1157 minwrk = f2cmax(i__2,bdspac);
1158 } else if (wntvo && wntun) {
1160 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
1162 wrkbl = *m + lwork_sgelqf__;
1164 i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1165 wrkbl = f2cmax(i__2,i__3);
1167 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1168 wrkbl = f2cmax(i__2,i__3);
1170 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1171 wrkbl = f2cmax(i__2,i__3);
1172 wrkbl = f2cmax(wrkbl,bdspac);
1174 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
1175 maxwrk = f2cmax(i__2,i__3);
1178 minwrk = f2cmax(i__2,bdspac);
1179 } else if (wntvo && wntuas) {
1181 /* Path 3t(N much larger than M, JOBU='S' or 'A', */
1184 wrkbl = *m + lwork_sgelqf__;
1186 i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1187 wrkbl = f2cmax(i__2,i__3);
1189 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1190 wrkbl = f2cmax(i__2,i__3);
1192 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1193 wrkbl = f2cmax(i__2,i__3);
1195 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1196 wrkbl = f2cmax(i__2,i__3);
1197 wrkbl = f2cmax(wrkbl,bdspac);
1199 i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
1200 maxwrk = f2cmax(i__2,i__3);
1203 minwrk = f2cmax(i__2,bdspac);
1204 } else if (wntvs && wntun) {
1206 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
1208 wrkbl = *m + lwork_sgelqf__;
1210 i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1211 wrkbl = f2cmax(i__2,i__3);
1213 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1214 wrkbl = f2cmax(i__2,i__3);
1216 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1217 wrkbl = f2cmax(i__2,i__3);
1218 wrkbl = f2cmax(wrkbl,bdspac);
1219 maxwrk = *m * *m + wrkbl;
1222 minwrk = f2cmax(i__2,bdspac);
1223 } else if (wntvs && wntuo) {
1225 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
1227 wrkbl = *m + lwork_sgelqf__;
1229 i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1230 wrkbl = f2cmax(i__2,i__3);
1232 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1233 wrkbl = f2cmax(i__2,i__3);
1235 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1236 wrkbl = f2cmax(i__2,i__3);
1238 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1239 wrkbl = f2cmax(i__2,i__3);
1240 wrkbl = f2cmax(wrkbl,bdspac);
1241 maxwrk = (*m << 1) * *m + wrkbl;
1244 minwrk = f2cmax(i__2,bdspac);
1245 maxwrk = f2cmax(maxwrk,minwrk);
1246 } else if (wntvs && wntuas) {
1248 /* Path 6t(N much larger than M, JOBU='S' or 'A', */
1251 wrkbl = *m + lwork_sgelqf__;
1253 i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1254 wrkbl = f2cmax(i__2,i__3);
1256 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1257 wrkbl = f2cmax(i__2,i__3);
1259 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1260 wrkbl = f2cmax(i__2,i__3);
1262 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1263 wrkbl = f2cmax(i__2,i__3);
1264 wrkbl = f2cmax(wrkbl,bdspac);
1265 maxwrk = *m * *m + wrkbl;
1268 minwrk = f2cmax(i__2,bdspac);
1269 } else if (wntva && wntun) {
1271 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
1273 wrkbl = *m + lwork_sgelqf__;
1275 i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1276 wrkbl = f2cmax(i__2,i__3);
1278 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1279 wrkbl = f2cmax(i__2,i__3);
1281 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1282 wrkbl = f2cmax(i__2,i__3);
1283 wrkbl = f2cmax(wrkbl,bdspac);
1284 maxwrk = *m * *m + wrkbl;
1287 minwrk = f2cmax(i__2,bdspac);
1288 } else if (wntva && wntuo) {
1290 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
1292 wrkbl = *m + lwork_sgelqf__;
1294 i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1295 wrkbl = f2cmax(i__2,i__3);
1297 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1298 wrkbl = f2cmax(i__2,i__3);
1300 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1301 wrkbl = f2cmax(i__2,i__3);
1303 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1304 wrkbl = f2cmax(i__2,i__3);
1305 wrkbl = f2cmax(wrkbl,bdspac);
1306 maxwrk = (*m << 1) * *m + wrkbl;
1309 minwrk = f2cmax(i__2,bdspac);
1310 } else if (wntva && wntuas) {
1312 /* Path 9t(N much larger than M, JOBU='S' or 'A', */
1315 wrkbl = *m + lwork_sgelqf__;
1317 i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1318 wrkbl = f2cmax(i__2,i__3);
1320 i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1321 wrkbl = f2cmax(i__2,i__3);
1323 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1324 wrkbl = f2cmax(i__2,i__3);
1326 i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1327 wrkbl = f2cmax(i__2,i__3);
1328 wrkbl = f2cmax(wrkbl,bdspac);
1329 maxwrk = *m * *m + wrkbl;
1332 minwrk = f2cmax(i__2,bdspac);
1336 /* Path 10t(N greater than M, but not much larger) */
1338 sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
1340 lwork_sgebrd__ = (integer) dum[0];
1341 maxwrk = *m * 3 + lwork_sgebrd__;
1342 if (wntvs || wntvo) {
1343 /* Compute space needed for SORGBR P */
1344 sorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, &
1346 lwork_sorgbr_p__ = (integer) dum[0];
1348 i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
1349 maxwrk = f2cmax(i__2,i__3);
1352 sorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, &
1354 lwork_sorgbr_p__ = (integer) dum[0];
1356 i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
1357 maxwrk = f2cmax(i__2,i__3);
1361 i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
1362 maxwrk = f2cmax(i__2,i__3);
1364 maxwrk = f2cmax(maxwrk,bdspac);
1367 minwrk = f2cmax(i__2,bdspac);
1370 maxwrk = f2cmax(maxwrk,minwrk);
1371 work[1] = (real) maxwrk;
1373 if (*lwork < minwrk && ! lquery) {
1380 xerbla_("SGESVD", &i__2, (ftnlen)6);
1382 } else if (lquery) {
1386 /* Quick return if possible */
1388 if (*m == 0 || *n == 0) {
1392 /* Get machine constants */
1395 smlnum = sqrt(slamch_("S")) / eps;
1396 bignum = 1.f / smlnum;
1398 /* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
1400 anrm = slange_("M", m, n, &a[a_offset], lda, dum);
1402 if (anrm > 0.f && anrm < smlnum) {
1404 slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
1406 } else if (anrm > bignum) {
1408 slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
1414 /* A has at least as many rows as columns. If A has sufficiently */
1415 /* more rows than columns, first reduce using the QR */
1416 /* decomposition (if sufficient workspace available) */
1422 /* Path 1 (M much larger than N, JOBU='N') */
1423 /* No left singular vectors to be computed */
1429 /* (Workspace: need 2*N, prefer N+N*NB) */
1431 i__2 = *lwork - iwork + 1;
1432 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
1435 /* Zero out below R */
1440 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2],
1448 /* Bidiagonalize R in A */
1449 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1451 i__2 = *lwork - iwork + 1;
1452 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
1453 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
1455 if (wntvo || wntvas) {
1457 /* If right singular vectors desired, generate P'. */
1458 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
1460 i__2 = *lwork - iwork + 1;
1461 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
1462 work[iwork], &i__2, &ierr);
1467 /* Perform bidiagonal QR iteration, computing right */
1468 /* singular vectors of A in A if desired */
1469 /* (Workspace: need BDSPAC) */
1471 sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
1472 a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork],
1475 /* If right singular vectors desired in VT, copy them there */
1478 slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset],
1482 } else if (wntuo && wntvn) {
1484 /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
1485 /* N left singular vectors to be overwritten on A and */
1486 /* no right singular vectors to be computed */
1490 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
1492 /* Sufficient workspace for a fast algorithm */
1496 i__2 = wrkbl, i__3 = *lda * *n + *n;
1497 if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
1499 /* WORK(IU) is LDA by N, WORK(IR) is LDA by N */
1503 } else /* if(complicated condition) */ {
1505 i__2 = wrkbl, i__3 = *lda * *n + *n;
1506 if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
1508 /* WORK(IU) is LDA by N, WORK(IR) is N by N */
1514 /* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
1516 ldwrku = (*lwork - *n * *n - *n) / *n;
1520 itau = ir + ldwrkr * *n;
1524 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1526 i__2 = *lwork - iwork + 1;
1527 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1530 /* Copy R to WORK(IR) and zero out below it */
1532 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
1535 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1],
1538 /* Generate Q in A */
1539 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1541 i__2 = *lwork - iwork + 1;
1542 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1543 iwork], &i__2, &ierr);
1549 /* Bidiagonalize R in WORK(IR) */
1550 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1552 i__2 = *lwork - iwork + 1;
1553 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
1554 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
1556 /* Generate left vectors bidiagonalizing R */
1557 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1559 i__2 = *lwork - iwork + 1;
1560 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1561 work[iwork], &i__2, &ierr);
1564 /* Perform bidiagonal QR iteration, computing left */
1565 /* singular vectors of R in WORK(IR) */
1566 /* (Workspace: need N*N+BDSPAC) */
1568 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
1569 c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
1573 /* Multiply Q in A by left singular vectors of R in */
1574 /* WORK(IR), storing result in WORK(IU) and copying to A */
1575 /* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
1579 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
1582 i__4 = *m - i__ + 1;
1583 chunk = f2cmin(i__4,ldwrku);
1584 sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ +
1585 a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
1587 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
1594 /* Insufficient workspace for a fast algorithm */
1601 /* Bidiagonalize A */
1602 /* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
1604 i__3 = *lwork - iwork + 1;
1605 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
1606 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
1608 /* Generate left vectors bidiagonalizing A */
1609 /* (Workspace: need 4*N, prefer 3*N+N*NB) */
1611 i__3 = *lwork - iwork + 1;
1612 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
1613 work[iwork], &i__3, &ierr);
1616 /* Perform bidiagonal QR iteration, computing left */
1617 /* singular vectors of A in A */
1618 /* (Workspace: need BDSPAC) */
1620 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
1621 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
1626 } else if (wntuo && wntvas) {
1628 /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */
1629 /* N left singular vectors to be overwritten on A and */
1630 /* N right singular vectors to be computed in VT */
1634 if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {
1636 /* Sufficient workspace for a fast algorithm */
1640 i__3 = wrkbl, i__2 = *lda * *n + *n;
1641 if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
1643 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1647 } else /* if(complicated condition) */ {
1649 i__3 = wrkbl, i__2 = *lda * *n + *n;
1650 if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
1652 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
1658 /* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
1660 ldwrku = (*lwork - *n * *n - *n) / *n;
1664 itau = ir + ldwrkr * *n;
1668 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1670 i__3 = *lwork - iwork + 1;
1671 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1674 /* Copy R to VT, zeroing out below it */
1676 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
1681 slaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[
1682 vt_dim1 + 2], ldvt);
1685 /* Generate Q in A */
1686 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1688 i__3 = *lwork - iwork + 1;
1689 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1690 iwork], &i__3, &ierr);
1696 /* Bidiagonalize R in VT, copying result to WORK(IR) */
1697 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1699 i__3 = *lwork - iwork + 1;
1700 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
1701 work[itauq], &work[itaup], &work[iwork], &i__3, &
1703 slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
1706 /* Generate left vectors bidiagonalizing R in WORK(IR) */
1707 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1709 i__3 = *lwork - iwork + 1;
1710 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1711 work[iwork], &i__3, &ierr);
1713 /* Generate right vectors bidiagonalizing R in VT */
1714 /* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */
1716 i__3 = *lwork - iwork + 1;
1717 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup],
1718 &work[iwork], &i__3, &ierr);
1721 /* Perform bidiagonal QR iteration, computing left */
1722 /* singular vectors of R in WORK(IR) and computing right */
1723 /* singular vectors of R in VT */
1724 /* (Workspace: need N*N+BDSPAC) */
1726 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
1727 vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1,
1728 &work[iwork], info);
1731 /* Multiply Q in A by left singular vectors of R in */
1732 /* WORK(IR), storing result in WORK(IU) and copying to A */
1733 /* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
1737 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
1740 i__4 = *m - i__ + 1;
1741 chunk = f2cmin(i__4,ldwrku);
1742 sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ +
1743 a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
1745 slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
1752 /* Insufficient workspace for a fast algorithm */
1758 /* (Workspace: need 2*N, prefer N+N*NB) */
1760 i__2 = *lwork - iwork + 1;
1761 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1764 /* Copy R to VT, zeroing out below it */
1766 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
1771 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
1772 vt_dim1 + 2], ldvt);
1775 /* Generate Q in A */
1776 /* (Workspace: need 2*N, prefer N+N*NB) */
1778 i__2 = *lwork - iwork + 1;
1779 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1780 iwork], &i__2, &ierr);
1786 /* Bidiagonalize R in VT */
1787 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1789 i__2 = *lwork - iwork + 1;
1790 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
1791 work[itauq], &work[itaup], &work[iwork], &i__2, &
1794 /* Multiply Q in A by left vectors bidiagonalizing R */
1795 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
1797 i__2 = *lwork - iwork + 1;
1798 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
1799 work[itauq], &a[a_offset], lda, &work[iwork], &
1802 /* Generate right vectors bidiagonalizing R in VT */
1803 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
1805 i__2 = *lwork - iwork + 1;
1806 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup],
1807 &work[iwork], &i__2, &ierr);
1810 /* Perform bidiagonal QR iteration, computing left */
1811 /* singular vectors of A in A and computing right */
1812 /* singular vectors of A in VT */
1813 /* (Workspace: need BDSPAC) */
1815 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
1816 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
1825 /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
1826 /* N left singular vectors to be computed in U and */
1827 /* no right singular vectors to be computed */
1831 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
1833 /* Sufficient workspace for a fast algorithm */
1836 if (*lwork >= wrkbl + *lda * *n) {
1838 /* WORK(IR) is LDA by N */
1843 /* WORK(IR) is N by N */
1847 itau = ir + ldwrkr * *n;
1851 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1853 i__2 = *lwork - iwork + 1;
1854 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1855 iwork], &i__2, &ierr);
1857 /* Copy R to WORK(IR), zeroing out below it */
1859 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
1863 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir +
1866 /* Generate Q in A */
1867 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1869 i__2 = *lwork - iwork + 1;
1870 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
1871 work[iwork], &i__2, &ierr);
1877 /* Bidiagonalize R in WORK(IR) */
1878 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1880 i__2 = *lwork - iwork + 1;
1881 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
1882 work[itauq], &work[itaup], &work[iwork], &
1885 /* Generate left vectors bidiagonalizing R in WORK(IR) */
1886 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1888 i__2 = *lwork - iwork + 1;
1889 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
1890 , &work[iwork], &i__2, &ierr);
1893 /* Perform bidiagonal QR iteration, computing left */
1894 /* singular vectors of R in WORK(IR) */
1895 /* (Workspace: need N*N+BDSPAC) */
1897 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie],
1898 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
1901 /* Multiply Q in A by left singular vectors of R in */
1902 /* WORK(IR), storing result in U */
1903 /* (Workspace: need N*N) */
1905 sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
1906 work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu);
1910 /* Insufficient workspace for a fast algorithm */
1915 /* Compute A=Q*R, copying result to U */
1916 /* (Workspace: need 2*N, prefer N+N*NB) */
1918 i__2 = *lwork - iwork + 1;
1919 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1920 iwork], &i__2, &ierr);
1921 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
1924 /* Generate Q in U */
1925 /* (Workspace: need 2*N, prefer N+N*NB) */
1927 i__2 = *lwork - iwork + 1;
1928 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
1929 work[iwork], &i__2, &ierr);
1935 /* Zero out below R in A */
1940 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
1944 /* Bidiagonalize R in A */
1945 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1947 i__2 = *lwork - iwork + 1;
1948 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
1949 work[itauq], &work[itaup], &work[iwork], &
1952 /* Multiply Q in U by left vectors bidiagonalizing R */
1953 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
1955 i__2 = *lwork - iwork + 1;
1956 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
1957 work[itauq], &u[u_offset], ldu, &work[iwork],
1962 /* Perform bidiagonal QR iteration, computing left */
1963 /* singular vectors of A in U */
1964 /* (Workspace: need BDSPAC) */
1966 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie],
1967 dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
1974 /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
1975 /* N left singular vectors to be computed in U and */
1976 /* N right singular vectors to be overwritten on A */
1980 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
1982 /* Sufficient workspace for a fast algorithm */
1985 if (*lwork >= wrkbl + (*lda << 1) * *n) {
1987 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1990 ir = iu + ldwrku * *n;
1992 } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
1994 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
1997 ir = iu + ldwrku * *n;
2001 /* WORK(IU) is N by N and WORK(IR) is N by N */
2004 ir = iu + ldwrku * *n;
2007 itau = ir + ldwrkr * *n;
2011 /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2013 i__2 = *lwork - iwork + 1;
2014 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2015 iwork], &i__2, &ierr);
2017 /* Copy R to WORK(IU), zeroing out below it */
2019 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2023 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
2026 /* Generate Q in A */
2027 /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2029 i__2 = *lwork - iwork + 1;
2030 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
2031 work[iwork], &i__2, &ierr);
2037 /* Bidiagonalize R in WORK(IU), copying result to */
2039 /* (Workspace: need 2*N*N+4*N, */
2040 /* prefer 2*N*N+3*N+2*N*NB) */
2042 i__2 = *lwork - iwork + 1;
2043 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2044 work[itauq], &work[itaup], &work[iwork], &
2046 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
2049 /* Generate left bidiagonalizing vectors in WORK(IU) */
2050 /* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
2052 i__2 = *lwork - iwork + 1;
2053 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2054 , &work[iwork], &i__2, &ierr);
2056 /* Generate right bidiagonalizing vectors in WORK(IR) */
2057 /* (Workspace: need 2*N*N+4*N-1, */
2058 /* prefer 2*N*N+3*N+(N-1)*NB) */
2060 i__2 = *lwork - iwork + 1;
2061 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
2062 , &work[iwork], &i__2, &ierr);
2065 /* Perform bidiagonal QR iteration, computing left */
2066 /* singular vectors of R in WORK(IU) and computing */
2067 /* right singular vectors of R in WORK(IR) */
2068 /* (Workspace: need 2*N*N+BDSPAC) */
2070 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
2071 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1,
2072 &work[iwork], info);
2074 /* Multiply Q in A by left singular vectors of R in */
2075 /* WORK(IU), storing result in U */
2076 /* (Workspace: need N*N) */
2078 sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
2079 work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
2081 /* Copy right singular vectors of R to A */
2082 /* (Workspace: need N*N) */
2084 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset],
2089 /* Insufficient workspace for a fast algorithm */
2094 /* Compute A=Q*R, copying result to U */
2095 /* (Workspace: need 2*N, prefer N+N*NB) */
2097 i__2 = *lwork - iwork + 1;
2098 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2099 iwork], &i__2, &ierr);
2100 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2103 /* Generate Q in U */
2104 /* (Workspace: need 2*N, prefer N+N*NB) */
2106 i__2 = *lwork - iwork + 1;
2107 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
2108 work[iwork], &i__2, &ierr);
2114 /* Zero out below R in A */
2119 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
2123 /* Bidiagonalize R in A */
2124 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2126 i__2 = *lwork - iwork + 1;
2127 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
2128 work[itauq], &work[itaup], &work[iwork], &
2131 /* Multiply Q in U by left vectors bidiagonalizing R */
2132 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2134 i__2 = *lwork - iwork + 1;
2135 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2136 work[itauq], &u[u_offset], ldu, &work[iwork],
2140 /* Generate right vectors bidiagonalizing R in A */
2141 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2143 i__2 = *lwork - iwork + 1;
2144 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
2145 &work[iwork], &i__2, &ierr);
2148 /* Perform bidiagonal QR iteration, computing left */
2149 /* singular vectors of A in U and computing right */
2150 /* singular vectors of A in A */
2151 /* (Workspace: need BDSPAC) */
2153 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
2154 a_offset], lda, &u[u_offset], ldu, dum, &c__1,
2155 &work[iwork], info);
2159 } else if (wntvas) {
2161 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */
2163 /* N left singular vectors to be computed in U and */
2164 /* N right singular vectors to be computed in VT */
2168 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
2170 /* Sufficient workspace for a fast algorithm */
2173 if (*lwork >= wrkbl + *lda * *n) {
2175 /* WORK(IU) is LDA by N */
2180 /* WORK(IU) is N by N */
2184 itau = iu + ldwrku * *n;
2188 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2190 i__2 = *lwork - iwork + 1;
2191 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2192 iwork], &i__2, &ierr);
2194 /* Copy R to WORK(IU), zeroing out below it */
2196 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2200 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
2203 /* Generate Q in A */
2204 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2206 i__2 = *lwork - iwork + 1;
2207 sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
2208 work[iwork], &i__2, &ierr);
2214 /* Bidiagonalize R in WORK(IU), copying result to VT */
2215 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
2217 i__2 = *lwork - iwork + 1;
2218 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2219 work[itauq], &work[itaup], &work[iwork], &
2221 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
2224 /* Generate left bidiagonalizing vectors in WORK(IU) */
2225 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
2227 i__2 = *lwork - iwork + 1;
2228 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2229 , &work[iwork], &i__2, &ierr);
2231 /* Generate right bidiagonalizing vectors in VT */
2232 /* (Workspace: need N*N+4*N-1, */
2233 /* prefer N*N+3*N+(N-1)*NB) */
2235 i__2 = *lwork - iwork + 1;
2236 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2237 itaup], &work[iwork], &i__2, &ierr)
2241 /* Perform bidiagonal QR iteration, computing left */
2242 /* singular vectors of R in WORK(IU) and computing */
2243 /* right singular vectors of R in VT */
2244 /* (Workspace: need N*N+BDSPAC) */
2246 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
2247 vt_offset], ldvt, &work[iu], &ldwrku, dum, &
2248 c__1, &work[iwork], info);
2250 /* Multiply Q in A by left singular vectors of R in */
2251 /* WORK(IU), storing result in U */
2252 /* (Workspace: need N*N) */
2254 sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
2255 work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
2259 /* Insufficient workspace for a fast algorithm */
2264 /* Compute A=Q*R, copying result to U */
2265 /* (Workspace: need 2*N, prefer N+N*NB) */
2267 i__2 = *lwork - iwork + 1;
2268 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2269 iwork], &i__2, &ierr);
2270 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2273 /* Generate Q in U */
2274 /* (Workspace: need 2*N, prefer N+N*NB) */
2276 i__2 = *lwork - iwork + 1;
2277 sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
2278 work[iwork], &i__2, &ierr);
2280 /* Copy R to VT, zeroing out below it */
2282 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
2287 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
2288 vt_dim1 + 2], ldvt);
2295 /* Bidiagonalize R in VT */
2296 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2298 i__2 = *lwork - iwork + 1;
2299 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie],
2300 &work[itauq], &work[itaup], &work[iwork], &
2303 /* Multiply Q in U by left bidiagonalizing vectors */
2305 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2307 i__2 = *lwork - iwork + 1;
2308 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt,
2309 &work[itauq], &u[u_offset], ldu, &work[iwork],
2312 /* Generate right bidiagonalizing vectors in VT */
2313 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2315 i__2 = *lwork - iwork + 1;
2316 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2317 itaup], &work[iwork], &i__2, &ierr)
2321 /* Perform bidiagonal QR iteration, computing left */
2322 /* singular vectors of A in U and computing right */
2323 /* singular vectors of A in VT */
2324 /* (Workspace: need BDSPAC) */
2326 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
2327 vt_offset], ldvt, &u[u_offset], ldu, dum, &
2328 c__1, &work[iwork], info);
2338 /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
2339 /* M left singular vectors to be computed in U and */
2340 /* no right singular vectors to be computed */
2343 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2344 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
2346 /* Sufficient workspace for a fast algorithm */
2349 if (*lwork >= wrkbl + *lda * *n) {
2351 /* WORK(IR) is LDA by N */
2356 /* WORK(IR) is N by N */
2360 itau = ir + ldwrkr * *n;
2363 /* Compute A=Q*R, copying result to U */
2364 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2366 i__2 = *lwork - iwork + 1;
2367 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2368 iwork], &i__2, &ierr);
2369 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2372 /* Copy R to WORK(IR), zeroing out below it */
2374 slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
2378 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir +
2381 /* Generate Q in U */
2382 /* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
2384 i__2 = *lwork - iwork + 1;
2385 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2386 work[iwork], &i__2, &ierr);
2392 /* Bidiagonalize R in WORK(IR) */
2393 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
2395 i__2 = *lwork - iwork + 1;
2396 sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
2397 work[itauq], &work[itaup], &work[iwork], &
2400 /* Generate left bidiagonalizing vectors in WORK(IR) */
2401 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
2403 i__2 = *lwork - iwork + 1;
2404 sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
2405 , &work[iwork], &i__2, &ierr);
2408 /* Perform bidiagonal QR iteration, computing left */
2409 /* singular vectors of R in WORK(IR) */
2410 /* (Workspace: need N*N+BDSPAC) */
2412 sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie],
2413 dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
2416 /* Multiply Q in U by left singular vectors of R in */
2417 /* WORK(IR), storing result in A */
2418 /* (Workspace: need N*N) */
2420 sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2421 work[ir], &ldwrkr, &c_b57, &a[a_offset], lda);
2423 /* Copy left singular vectors of A from A to U */
2425 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
2430 /* Insufficient workspace for a fast algorithm */
2435 /* Compute A=Q*R, copying result to U */
2436 /* (Workspace: need 2*N, prefer N+N*NB) */
2438 i__2 = *lwork - iwork + 1;
2439 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2440 iwork], &i__2, &ierr);
2441 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2444 /* Generate Q in U */
2445 /* (Workspace: need N+M, prefer N+M*NB) */
2447 i__2 = *lwork - iwork + 1;
2448 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2449 work[iwork], &i__2, &ierr);
2455 /* Zero out below R in A */
2460 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
2464 /* Bidiagonalize R in A */
2465 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2467 i__2 = *lwork - iwork + 1;
2468 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
2469 work[itauq], &work[itaup], &work[iwork], &
2472 /* Multiply Q in U by left bidiagonalizing vectors */
2474 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2476 i__2 = *lwork - iwork + 1;
2477 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2478 work[itauq], &u[u_offset], ldu, &work[iwork],
2483 /* Perform bidiagonal QR iteration, computing left */
2484 /* singular vectors of A in U */
2485 /* (Workspace: need BDSPAC) */
2487 sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie],
2488 dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
2495 /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
2496 /* M left singular vectors to be computed in U and */
2497 /* N right singular vectors to be overwritten on A */
2500 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2501 if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
2503 /* Sufficient workspace for a fast algorithm */
2506 if (*lwork >= wrkbl + (*lda << 1) * *n) {
2508 /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */
2511 ir = iu + ldwrku * *n;
2513 } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
2515 /* WORK(IU) is LDA by N and WORK(IR) is N by N */
2518 ir = iu + ldwrku * *n;
2522 /* WORK(IU) is N by N and WORK(IR) is N by N */
2525 ir = iu + ldwrku * *n;
2528 itau = ir + ldwrkr * *n;
2531 /* Compute A=Q*R, copying result to U */
2532 /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2534 i__2 = *lwork - iwork + 1;
2535 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2536 iwork], &i__2, &ierr);
2537 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2540 /* Generate Q in U */
2541 /* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */
2543 i__2 = *lwork - iwork + 1;
2544 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2545 work[iwork], &i__2, &ierr);
2547 /* Copy R to WORK(IU), zeroing out below it */
2549 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2553 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
2560 /* Bidiagonalize R in WORK(IU), copying result to */
2562 /* (Workspace: need 2*N*N+4*N, */
2563 /* prefer 2*N*N+3*N+2*N*NB) */
2565 i__2 = *lwork - iwork + 1;
2566 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2567 work[itauq], &work[itaup], &work[iwork], &
2569 slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
2572 /* Generate left bidiagonalizing vectors in WORK(IU) */
2573 /* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
2575 i__2 = *lwork - iwork + 1;
2576 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2577 , &work[iwork], &i__2, &ierr);
2579 /* Generate right bidiagonalizing vectors in WORK(IR) */
2580 /* (Workspace: need 2*N*N+4*N-1, */
2581 /* prefer 2*N*N+3*N+(N-1)*NB) */
2583 i__2 = *lwork - iwork + 1;
2584 sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
2585 , &work[iwork], &i__2, &ierr);
2588 /* Perform bidiagonal QR iteration, computing left */
2589 /* singular vectors of R in WORK(IU) and computing */
2590 /* right singular vectors of R in WORK(IR) */
2591 /* (Workspace: need 2*N*N+BDSPAC) */
2593 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
2594 ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1,
2595 &work[iwork], info);
2597 /* Multiply Q in U by left singular vectors of R in */
2598 /* WORK(IU), storing result in A */
2599 /* (Workspace: need N*N) */
2601 sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2602 work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
2604 /* Copy left singular vectors of A from A to U */
2606 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
2609 /* Copy right singular vectors of R from WORK(IR) to A */
2611 slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset],
2616 /* Insufficient workspace for a fast algorithm */
2621 /* Compute A=Q*R, copying result to U */
2622 /* (Workspace: need 2*N, prefer N+N*NB) */
2624 i__2 = *lwork - iwork + 1;
2625 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2626 iwork], &i__2, &ierr);
2627 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2630 /* Generate Q in U */
2631 /* (Workspace: need N+M, prefer N+M*NB) */
2633 i__2 = *lwork - iwork + 1;
2634 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2635 work[iwork], &i__2, &ierr);
2641 /* Zero out below R in A */
2646 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
2650 /* Bidiagonalize R in A */
2651 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2653 i__2 = *lwork - iwork + 1;
2654 sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
2655 work[itauq], &work[itaup], &work[iwork], &
2658 /* Multiply Q in U by left bidiagonalizing vectors */
2660 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2662 i__2 = *lwork - iwork + 1;
2663 sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2664 work[itauq], &u[u_offset], ldu, &work[iwork],
2668 /* Generate right bidiagonalizing vectors in A */
2669 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2671 i__2 = *lwork - iwork + 1;
2672 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
2673 &work[iwork], &i__2, &ierr);
2676 /* Perform bidiagonal QR iteration, computing left */
2677 /* singular vectors of A in U and computing right */
2678 /* singular vectors of A in A */
2679 /* (Workspace: need BDSPAC) */
2681 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
2682 a_offset], lda, &u[u_offset], ldu, dum, &c__1,
2683 &work[iwork], info);
2687 } else if (wntvas) {
2689 /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */
2691 /* M left singular vectors to be computed in U and */
2692 /* N right singular vectors to be computed in VT */
2695 i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2696 if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
2698 /* Sufficient workspace for a fast algorithm */
2701 if (*lwork >= wrkbl + *lda * *n) {
2703 /* WORK(IU) is LDA by N */
2708 /* WORK(IU) is N by N */
2712 itau = iu + ldwrku * *n;
2715 /* Compute A=Q*R, copying result to U */
2716 /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2718 i__2 = *lwork - iwork + 1;
2719 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2720 iwork], &i__2, &ierr);
2721 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2724 /* Generate Q in U */
2725 /* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
2727 i__2 = *lwork - iwork + 1;
2728 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2729 work[iwork], &i__2, &ierr);
2731 /* Copy R to WORK(IU), zeroing out below it */
2733 slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2737 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
2744 /* Bidiagonalize R in WORK(IU), copying result to VT */
2745 /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
2747 i__2 = *lwork - iwork + 1;
2748 sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2749 work[itauq], &work[itaup], &work[iwork], &
2751 slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
2754 /* Generate left bidiagonalizing vectors in WORK(IU) */
2755 /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
2757 i__2 = *lwork - iwork + 1;
2758 sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2759 , &work[iwork], &i__2, &ierr);
2761 /* Generate right bidiagonalizing vectors in VT */
2762 /* (Workspace: need N*N+4*N-1, */
2763 /* prefer N*N+3*N+(N-1)*NB) */
2765 i__2 = *lwork - iwork + 1;
2766 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2767 itaup], &work[iwork], &i__2, &ierr)
2771 /* Perform bidiagonal QR iteration, computing left */
2772 /* singular vectors of R in WORK(IU) and computing */
2773 /* right singular vectors of R in VT */
2774 /* (Workspace: need N*N+BDSPAC) */
2776 sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
2777 vt_offset], ldvt, &work[iu], &ldwrku, dum, &
2778 c__1, &work[iwork], info);
2780 /* Multiply Q in U by left singular vectors of R in */
2781 /* WORK(IU), storing result in A */
2782 /* (Workspace: need N*N) */
2784 sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2785 work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
2787 /* Copy left singular vectors of A from A to U */
2789 slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset],
2794 /* Insufficient workspace for a fast algorithm */
2799 /* Compute A=Q*R, copying result to U */
2800 /* (Workspace: need 2*N, prefer N+N*NB) */
2802 i__2 = *lwork - iwork + 1;
2803 sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2804 iwork], &i__2, &ierr);
2805 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset],
2808 /* Generate Q in U */
2809 /* (Workspace: need N+M, prefer N+M*NB) */
2811 i__2 = *lwork - iwork + 1;
2812 sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2813 work[iwork], &i__2, &ierr);
2815 /* Copy R from A to VT, zeroing out below it */
2817 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset],
2822 slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
2823 vt_dim1 + 2], ldvt);
2830 /* Bidiagonalize R in VT */
2831 /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2833 i__2 = *lwork - iwork + 1;
2834 sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie],
2835 &work[itauq], &work[itaup], &work[iwork], &
2838 /* Multiply Q in U by left bidiagonalizing vectors */
2840 /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2842 i__2 = *lwork - iwork + 1;
2843 sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt,
2844 &work[itauq], &u[u_offset], ldu, &work[iwork],
2847 /* Generate right bidiagonalizing vectors in VT */
2848 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2850 i__2 = *lwork - iwork + 1;
2851 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2852 itaup], &work[iwork], &i__2, &ierr)
2856 /* Perform bidiagonal QR iteration, computing left */
2857 /* singular vectors of A in U and computing right */
2858 /* singular vectors of A in VT */
2859 /* (Workspace: need BDSPAC) */
2861 sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
2862 vt_offset], ldvt, &u[u_offset], ldu, dum, &
2863 c__1, &work[iwork], info);
2875 /* Path 10 (M at least N, but not much larger) */
2876 /* Reduce to bidiagonal form without QR decomposition */
2883 /* Bidiagonalize A */
2884 /* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
2886 i__2 = *lwork - iwork + 1;
2887 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
2888 work[itaup], &work[iwork], &i__2, &ierr);
2891 /* If left singular vectors desired in U, copy result to U */
2892 /* and generate left bidiagonalizing vectors in U */
2893 /* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */
2895 slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
2902 i__2 = *lwork - iwork + 1;
2903 sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
2904 work[iwork], &i__2, &ierr);
2908 /* If right singular vectors desired in VT, copy result to */
2909 /* VT and generate right bidiagonalizing vectors in VT */
2910 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2912 slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
2913 i__2 = *lwork - iwork + 1;
2914 sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
2915 work[iwork], &i__2, &ierr);
2919 /* If left singular vectors desired in A, generate left */
2920 /* bidiagonalizing vectors in A */
2921 /* (Workspace: need 4*N, prefer 3*N+N*NB) */
2923 i__2 = *lwork - iwork + 1;
2924 sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
2925 iwork], &i__2, &ierr);
2929 /* If right singular vectors desired in A, generate right */
2930 /* bidiagonalizing vectors in A */
2931 /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2933 i__2 = *lwork - iwork + 1;
2934 sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
2935 iwork], &i__2, &ierr);
2938 if (wntuas || wntuo) {
2944 if (wntvas || wntvo) {
2950 if (! wntuo && ! wntvo) {
2952 /* Perform bidiagonal QR iteration, if desired, computing */
2953 /* left singular vectors in U and computing right singular */
2955 /* (Workspace: need BDSPAC) */
2957 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
2958 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
2960 } else if (! wntuo && wntvo) {
2962 /* Perform bidiagonal QR iteration, if desired, computing */
2963 /* left singular vectors in U and computing right singular */
2965 /* (Workspace: need BDSPAC) */
2967 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
2968 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
2972 /* Perform bidiagonal QR iteration, if desired, computing */
2973 /* left singular vectors in A and computing right singular */
2975 /* (Workspace: need BDSPAC) */
2977 sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
2978 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
2986 /* A has more columns than rows. If A has sufficiently more */
2987 /* columns than rows, first reduce using the LQ decomposition (if */
2988 /* sufficient workspace available) */
2994 /* Path 1t(N much larger than M, JOBVT='N') */
2995 /* No right singular vectors to be computed */
3001 /* (Workspace: need 2*M, prefer M+M*NB) */
3003 i__2 = *lwork - iwork + 1;
3004 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
3007 /* Zero out above L */
3011 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) +
3018 /* Bidiagonalize L in A */
3019 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3021 i__2 = *lwork - iwork + 1;
3022 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
3023 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3024 if (wntuo || wntuas) {
3026 /* If left singular vectors desired, generate Q */
3027 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
3029 i__2 = *lwork - iwork + 1;
3030 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
3031 work[iwork], &i__2, &ierr);
3035 if (wntuo || wntuas) {
3039 /* Perform bidiagonal QR iteration, computing left singular */
3040 /* vectors of A in A if desired */
3041 /* (Workspace: need BDSPAC) */
3043 sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
3044 c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
3047 /* If left singular vectors desired in U, copy them there */
3050 slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3053 } else if (wntvo && wntun) {
3055 /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
3056 /* M right singular vectors to be overwritten on A and */
3057 /* no left singular vectors to be computed */
3061 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3063 /* Sufficient workspace for a fast algorithm */
3067 i__2 = wrkbl, i__3 = *lda * *n + *m;
3068 if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
3070 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */
3075 } else /* if(complicated condition) */ {
3077 i__2 = wrkbl, i__3 = *lda * *n + *m;
3078 if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
3080 /* WORK(IU) is LDA by N and WORK(IR) is M by M */
3087 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */
3090 chunk = (*lwork - *m * *m - *m) / *m;
3094 itau = ir + ldwrkr * *m;
3098 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3100 i__2 = *lwork - iwork + 1;
3101 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3104 /* Copy L to WORK(IR) and zero out above it */
3106 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
3109 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir +
3112 /* Generate Q in A */
3113 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3115 i__2 = *lwork - iwork + 1;
3116 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3117 iwork], &i__2, &ierr);
3123 /* Bidiagonalize L in WORK(IR) */
3124 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3126 i__2 = *lwork - iwork + 1;
3127 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
3128 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3130 /* Generate right vectors bidiagonalizing L */
3131 /* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
3133 i__2 = *lwork - iwork + 1;
3134 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
3135 work[iwork], &i__2, &ierr);
3138 /* Perform bidiagonal QR iteration, computing right */
3139 /* singular vectors of L in WORK(IR) */
3140 /* (Workspace: need M*M+BDSPAC) */
3142 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
3143 ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
3147 /* Multiply right singular vectors of L in WORK(IR) by Q */
3148 /* in A, storing result in WORK(IU) and copying to A */
3149 /* (Workspace: need M*M+2*M, prefer M*M+M*N+M) */
3153 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
3156 i__4 = *n - i__ + 1;
3157 blk = f2cmin(i__4,chunk);
3158 sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
3159 ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
3161 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ *
3168 /* Insufficient workspace for a fast algorithm */
3175 /* Bidiagonalize A */
3176 /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
3178 i__3 = *lwork - iwork + 1;
3179 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
3180 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
3182 /* Generate right vectors bidiagonalizing A */
3183 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
3185 i__3 = *lwork - iwork + 1;
3186 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
3187 work[iwork], &i__3, &ierr);
3190 /* Perform bidiagonal QR iteration, computing right */
3191 /* singular vectors of A in A */
3192 /* (Workspace: need BDSPAC) */
3194 sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
3195 a_offset], lda, dum, &c__1, dum, &c__1, &work[
3200 } else if (wntvo && wntuas) {
3202 /* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */
3203 /* M right singular vectors to be overwritten on A and */
3204 /* M left singular vectors to be computed in U */
3208 if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {
3210 /* Sufficient workspace for a fast algorithm */
3214 i__3 = wrkbl, i__2 = *lda * *n + *m;
3215 if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
3217 /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */
3222 } else /* if(complicated condition) */ {
3224 i__3 = wrkbl, i__2 = *lda * *n + *m;
3225 if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
3227 /* WORK(IU) is LDA by N and WORK(IR) is M by M */
3234 /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */
3237 chunk = (*lwork - *m * *m - *m) / *m;
3241 itau = ir + ldwrkr * *m;
3245 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3247 i__3 = *lwork - iwork + 1;
3248 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3251 /* Copy L to U, zeroing about above it */
3253 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3256 slaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 <<
3259 /* Generate Q in A */
3260 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3262 i__3 = *lwork - iwork + 1;
3263 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3264 iwork], &i__3, &ierr);
3270 /* Bidiagonalize L in U, copying result to WORK(IR) */
3271 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3273 i__3 = *lwork - iwork + 1;
3274 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
3275 itauq], &work[itaup], &work[iwork], &i__3, &ierr);
3276 slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
3278 /* Generate right vectors bidiagonalizing L in WORK(IR) */
3279 /* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
3281 i__3 = *lwork - iwork + 1;
3282 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
3283 work[iwork], &i__3, &ierr);
3285 /* Generate left vectors bidiagonalizing L in U */
3286 /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
3288 i__3 = *lwork - iwork + 1;
3289 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3290 work[iwork], &i__3, &ierr);
3293 /* Perform bidiagonal QR iteration, computing left */
3294 /* singular vectors of L in U, and computing right */
3295 /* singular vectors of L in WORK(IR) */
3296 /* (Workspace: need M*M+BDSPAC) */
3298 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir],
3299 &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
3303 /* Multiply right singular vectors of L in WORK(IR) by Q */
3304 /* in A, storing result in WORK(IU) and copying to A */
3305 /* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */
3309 for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
3312 i__4 = *n - i__ + 1;
3313 blk = f2cmin(i__4,chunk);
3314 sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
3315 ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
3317 slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ *
3324 /* Insufficient workspace for a fast algorithm */
3330 /* (Workspace: need 2*M, prefer M+M*NB) */
3332 i__2 = *lwork - iwork + 1;
3333 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3336 /* Copy L to U, zeroing out above it */
3338 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3341 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 <<
3344 /* Generate Q in A */
3345 /* (Workspace: need 2*M, prefer M+M*NB) */
3347 i__2 = *lwork - iwork + 1;
3348 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3349 iwork], &i__2, &ierr);
3355 /* Bidiagonalize L in U */
3356 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3358 i__2 = *lwork - iwork + 1;
3359 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
3360 itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3362 /* Multiply right vectors bidiagonalizing L by Q in A */
3363 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3365 i__2 = *lwork - iwork + 1;
3366 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
3367 itaup], &a[a_offset], lda, &work[iwork], &i__2, &
3370 /* Generate left vectors bidiagonalizing L in U */
3371 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
3373 i__2 = *lwork - iwork + 1;
3374 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3375 work[iwork], &i__2, &ierr);
3378 /* Perform bidiagonal QR iteration, computing left */
3379 /* singular vectors of A in U and computing right */
3380 /* singular vectors of A in A */
3381 /* (Workspace: need BDSPAC) */
3383 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
3384 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
3393 /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
3394 /* M right singular vectors to be computed in VT and */
3395 /* no left singular vectors to be computed */
3399 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3401 /* Sufficient workspace for a fast algorithm */
3404 if (*lwork >= wrkbl + *lda * *m) {
3406 /* WORK(IR) is LDA by M */
3411 /* WORK(IR) is M by M */
3415 itau = ir + ldwrkr * *m;
3419 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3421 i__2 = *lwork - iwork + 1;
3422 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3423 iwork], &i__2, &ierr);
3425 /* Copy L to WORK(IR), zeroing out above it */
3427 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
3431 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir +
3434 /* Generate Q in A */
3435 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3437 i__2 = *lwork - iwork + 1;
3438 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3439 work[iwork], &i__2, &ierr);
3445 /* Bidiagonalize L in WORK(IR) */
3446 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3448 i__2 = *lwork - iwork + 1;
3449 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
3450 work[itauq], &work[itaup], &work[iwork], &
3453 /* Generate right vectors bidiagonalizing L in */
3455 /* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
3457 i__2 = *lwork - iwork + 1;
3458 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
3459 , &work[iwork], &i__2, &ierr);
3462 /* Perform bidiagonal QR iteration, computing right */
3463 /* singular vectors of L in WORK(IR) */
3464 /* (Workspace: need M*M+BDSPAC) */
3466 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
3467 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
3470 /* Multiply right singular vectors of L in WORK(IR) by */
3471 /* Q in A, storing result in VT */
3472 /* (Workspace: need M*M) */
3474 sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr,
3475 &a[a_offset], lda, &c_b57, &vt[vt_offset],
3480 /* Insufficient workspace for a fast algorithm */
3486 /* (Workspace: need 2*M, prefer M+M*NB) */
3488 i__2 = *lwork - iwork + 1;
3489 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3490 iwork], &i__2, &ierr);
3492 /* Copy result to VT */
3494 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
3497 /* Generate Q in VT */
3498 /* (Workspace: need 2*M, prefer M+M*NB) */
3500 i__2 = *lwork - iwork + 1;
3501 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3502 work[iwork], &i__2, &ierr);
3508 /* Zero out above L in A */
3512 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1
3515 /* Bidiagonalize L in A */
3516 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3518 i__2 = *lwork - iwork + 1;
3519 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3520 work[itauq], &work[itaup], &work[iwork], &
3523 /* Multiply right vectors bidiagonalizing L by Q in VT */
3524 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3526 i__2 = *lwork - iwork + 1;
3527 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3528 work[itaup], &vt[vt_offset], ldvt, &work[
3529 iwork], &i__2, &ierr);
3532 /* Perform bidiagonal QR iteration, computing right */
3533 /* singular vectors of A in VT */
3534 /* (Workspace: need BDSPAC) */
3536 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
3537 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
3544 /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
3545 /* M right singular vectors to be computed in VT and */
3546 /* M left singular vectors to be overwritten on A */
3550 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
3552 /* Sufficient workspace for a fast algorithm */
3555 if (*lwork >= wrkbl + (*lda << 1) * *m) {
3557 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */
3560 ir = iu + ldwrku * *m;
3562 } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
3564 /* WORK(IU) is LDA by M and WORK(IR) is M by M */
3567 ir = iu + ldwrku * *m;
3571 /* WORK(IU) is M by M and WORK(IR) is M by M */
3574 ir = iu + ldwrku * *m;
3577 itau = ir + ldwrkr * *m;
3581 /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3583 i__2 = *lwork - iwork + 1;
3584 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3585 iwork], &i__2, &ierr);
3587 /* Copy L to WORK(IU), zeroing out below it */
3589 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3593 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
3596 /* Generate Q in A */
3597 /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3599 i__2 = *lwork - iwork + 1;
3600 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3601 work[iwork], &i__2, &ierr);
3607 /* Bidiagonalize L in WORK(IU), copying result to */
3609 /* (Workspace: need 2*M*M+4*M, */
3610 /* prefer 2*M*M+3*M+2*M*NB) */
3612 i__2 = *lwork - iwork + 1;
3613 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
3614 work[itauq], &work[itaup], &work[iwork], &
3616 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
3619 /* Generate right bidiagonalizing vectors in WORK(IU) */
3620 /* (Workspace: need 2*M*M+4*M-1, */
3621 /* prefer 2*M*M+3*M+(M-1)*NB) */
3623 i__2 = *lwork - iwork + 1;
3624 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3625 , &work[iwork], &i__2, &ierr);
3627 /* Generate left bidiagonalizing vectors in WORK(IR) */
3628 /* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
3630 i__2 = *lwork - iwork + 1;
3631 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
3632 , &work[iwork], &i__2, &ierr);
3635 /* Perform bidiagonal QR iteration, computing left */
3636 /* singular vectors of L in WORK(IR) and computing */
3637 /* right singular vectors of L in WORK(IU) */
3638 /* (Workspace: need 2*M*M+BDSPAC) */
3640 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
3641 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1,
3642 &work[iwork], info);
3644 /* Multiply right singular vectors of L in WORK(IU) by */
3645 /* Q in A, storing result in VT */
3646 /* (Workspace: need M*M) */
3648 sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku,
3649 &a[a_offset], lda, &c_b57, &vt[vt_offset],
3652 /* Copy left singular vectors of L to A */
3653 /* (Workspace: need M*M) */
3655 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset],
3660 /* Insufficient workspace for a fast algorithm */
3665 /* Compute A=L*Q, copying result to VT */
3666 /* (Workspace: need 2*M, prefer M+M*NB) */
3668 i__2 = *lwork - iwork + 1;
3669 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3670 iwork], &i__2, &ierr);
3671 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
3674 /* Generate Q in VT */
3675 /* (Workspace: need 2*M, prefer M+M*NB) */
3677 i__2 = *lwork - iwork + 1;
3678 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3679 work[iwork], &i__2, &ierr);
3685 /* Zero out above L in A */
3689 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1
3692 /* Bidiagonalize L in A */
3693 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3695 i__2 = *lwork - iwork + 1;
3696 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3697 work[itauq], &work[itaup], &work[iwork], &
3700 /* Multiply right vectors bidiagonalizing L by Q in VT */
3701 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3703 i__2 = *lwork - iwork + 1;
3704 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3705 work[itaup], &vt[vt_offset], ldvt, &work[
3706 iwork], &i__2, &ierr);
3708 /* Generate left bidiagonalizing vectors of L in A */
3709 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
3711 i__2 = *lwork - iwork + 1;
3712 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
3713 &work[iwork], &i__2, &ierr);
3716 /* Perform bidiagonal QR iteration, compute left */
3717 /* singular vectors of A in A and compute right */
3718 /* singular vectors of A in VT */
3719 /* (Workspace: need BDSPAC) */
3721 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
3722 vt_offset], ldvt, &a[a_offset], lda, dum, &
3723 c__1, &work[iwork], info);
3727 } else if (wntuas) {
3729 /* Path 6t(N much larger than M, JOBU='S' or 'A', */
3731 /* M right singular vectors to be computed in VT and */
3732 /* M left singular vectors to be computed in U */
3736 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3738 /* Sufficient workspace for a fast algorithm */
3741 if (*lwork >= wrkbl + *lda * *m) {
3743 /* WORK(IU) is LDA by N */
3748 /* WORK(IU) is LDA by M */
3752 itau = iu + ldwrku * *m;
3756 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3758 i__2 = *lwork - iwork + 1;
3759 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3760 iwork], &i__2, &ierr);
3762 /* Copy L to WORK(IU), zeroing out above it */
3764 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3768 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
3771 /* Generate Q in A */
3772 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3774 i__2 = *lwork - iwork + 1;
3775 sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3776 work[iwork], &i__2, &ierr);
3782 /* Bidiagonalize L in WORK(IU), copying result to U */
3783 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3785 i__2 = *lwork - iwork + 1;
3786 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
3787 work[itauq], &work[itaup], &work[iwork], &
3789 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset],
3792 /* Generate right bidiagonalizing vectors in WORK(IU) */
3793 /* (Workspace: need M*M+4*M-1, */
3794 /* prefer M*M+3*M+(M-1)*NB) */
3796 i__2 = *lwork - iwork + 1;
3797 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3798 , &work[iwork], &i__2, &ierr);
3800 /* Generate left bidiagonalizing vectors in U */
3801 /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
3803 i__2 = *lwork - iwork + 1;
3804 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3805 &work[iwork], &i__2, &ierr);
3808 /* Perform bidiagonal QR iteration, computing left */
3809 /* singular vectors of L in U and computing right */
3810 /* singular vectors of L in WORK(IU) */
3811 /* (Workspace: need M*M+BDSPAC) */
3813 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
3814 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
3817 /* Multiply right singular vectors of L in WORK(IU) by */
3818 /* Q in A, storing result in VT */
3819 /* (Workspace: need M*M) */
3821 sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku,
3822 &a[a_offset], lda, &c_b57, &vt[vt_offset],
3827 /* Insufficient workspace for a fast algorithm */
3832 /* Compute A=L*Q, copying result to VT */
3833 /* (Workspace: need 2*M, prefer M+M*NB) */
3835 i__2 = *lwork - iwork + 1;
3836 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3837 iwork], &i__2, &ierr);
3838 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
3841 /* Generate Q in VT */
3842 /* (Workspace: need 2*M, prefer M+M*NB) */
3844 i__2 = *lwork - iwork + 1;
3845 sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3846 work[iwork], &i__2, &ierr);
3848 /* Copy L to U, zeroing out above it */
3850 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset],
3854 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1
3861 /* Bidiagonalize L in U */
3862 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3864 i__2 = *lwork - iwork + 1;
3865 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
3866 work[itauq], &work[itaup], &work[iwork], &
3869 /* Multiply right bidiagonalizing vectors in U by Q */
3871 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3873 i__2 = *lwork - iwork + 1;
3874 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
3875 work[itaup], &vt[vt_offset], ldvt, &work[
3876 iwork], &i__2, &ierr);
3878 /* Generate left bidiagonalizing vectors in U */
3879 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
3881 i__2 = *lwork - iwork + 1;
3882 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3883 &work[iwork], &i__2, &ierr);
3886 /* Perform bidiagonal QR iteration, computing left */
3887 /* singular vectors of A in U and computing right */
3888 /* singular vectors of A in VT */
3889 /* (Workspace: need BDSPAC) */
3891 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
3892 vt_offset], ldvt, &u[u_offset], ldu, dum, &
3893 c__1, &work[iwork], info);
3903 /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
3904 /* N right singular vectors to be computed in VT and */
3905 /* no left singular vectors to be computed */
3908 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
3909 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3911 /* Sufficient workspace for a fast algorithm */
3914 if (*lwork >= wrkbl + *lda * *m) {
3916 /* WORK(IR) is LDA by M */
3921 /* WORK(IR) is M by M */
3925 itau = ir + ldwrkr * *m;
3928 /* Compute A=L*Q, copying result to VT */
3929 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3931 i__2 = *lwork - iwork + 1;
3932 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3933 iwork], &i__2, &ierr);
3934 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
3937 /* Copy L to WORK(IR), zeroing out above it */
3939 slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
3943 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir +
3946 /* Generate Q in VT */
3947 /* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
3949 i__2 = *lwork - iwork + 1;
3950 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
3951 work[iwork], &i__2, &ierr);
3957 /* Bidiagonalize L in WORK(IR) */
3958 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3960 i__2 = *lwork - iwork + 1;
3961 sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
3962 work[itauq], &work[itaup], &work[iwork], &
3965 /* Generate right bidiagonalizing vectors in WORK(IR) */
3966 /* (Workspace: need M*M+4*M-1, */
3967 /* prefer M*M+3*M+(M-1)*NB) */
3969 i__2 = *lwork - iwork + 1;
3970 sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
3971 , &work[iwork], &i__2, &ierr);
3974 /* Perform bidiagonal QR iteration, computing right */
3975 /* singular vectors of L in WORK(IR) */
3976 /* (Workspace: need M*M+BDSPAC) */
3978 sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
3979 work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
3982 /* Multiply right singular vectors of L in WORK(IR) by */
3983 /* Q in VT, storing result in A */
3984 /* (Workspace: need M*M) */
3986 sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr,
3987 &vt[vt_offset], ldvt, &c_b57, &a[a_offset],
3990 /* Copy right singular vectors of A from A to VT */
3992 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
3997 /* Insufficient workspace for a fast algorithm */
4002 /* Compute A=L*Q, copying result to VT */
4003 /* (Workspace: need 2*M, prefer M+M*NB) */
4005 i__2 = *lwork - iwork + 1;
4006 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4007 iwork], &i__2, &ierr);
4008 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
4011 /* Generate Q in VT */
4012 /* (Workspace: need M+N, prefer M+N*NB) */
4014 i__2 = *lwork - iwork + 1;
4015 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4016 work[iwork], &i__2, &ierr);
4022 /* Zero out above L in A */
4026 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1
4029 /* Bidiagonalize L in A */
4030 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
4032 i__2 = *lwork - iwork + 1;
4033 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
4034 work[itauq], &work[itaup], &work[iwork], &
4037 /* Multiply right bidiagonalizing vectors in A by Q */
4039 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
4041 i__2 = *lwork - iwork + 1;
4042 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
4043 work[itaup], &vt[vt_offset], ldvt, &work[
4044 iwork], &i__2, &ierr);
4047 /* Perform bidiagonal QR iteration, computing right */
4048 /* singular vectors of A in VT */
4049 /* (Workspace: need BDSPAC) */
4051 sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
4052 vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
4059 /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
4060 /* N right singular vectors to be computed in VT and */
4061 /* M left singular vectors to be overwritten on A */
4064 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
4065 if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
4067 /* Sufficient workspace for a fast algorithm */
4070 if (*lwork >= wrkbl + (*lda << 1) * *m) {
4072 /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */
4075 ir = iu + ldwrku * *m;
4077 } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
4079 /* WORK(IU) is LDA by M and WORK(IR) is M by M */
4082 ir = iu + ldwrku * *m;
4086 /* WORK(IU) is M by M and WORK(IR) is M by M */
4089 ir = iu + ldwrku * *m;
4092 itau = ir + ldwrkr * *m;
4095 /* Compute A=L*Q, copying result to VT */
4096 /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
4098 i__2 = *lwork - iwork + 1;
4099 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4100 iwork], &i__2, &ierr);
4101 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
4104 /* Generate Q in VT */
4105 /* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */
4107 i__2 = *lwork - iwork + 1;
4108 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4109 work[iwork], &i__2, &ierr);
4111 /* Copy L to WORK(IU), zeroing out above it */
4113 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
4117 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
4124 /* Bidiagonalize L in WORK(IU), copying result to */
4126 /* (Workspace: need 2*M*M+4*M, */
4127 /* prefer 2*M*M+3*M+2*M*NB) */
4129 i__2 = *lwork - iwork + 1;
4130 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
4131 work[itauq], &work[itaup], &work[iwork], &
4133 slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
4136 /* Generate right bidiagonalizing vectors in WORK(IU) */
4137 /* (Workspace: need 2*M*M+4*M-1, */
4138 /* prefer 2*M*M+3*M+(M-1)*NB) */
4140 i__2 = *lwork - iwork + 1;
4141 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
4142 , &work[iwork], &i__2, &ierr);
4144 /* Generate left bidiagonalizing vectors in WORK(IR) */
4145 /* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
4147 i__2 = *lwork - iwork + 1;
4148 sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
4149 , &work[iwork], &i__2, &ierr);
4152 /* Perform bidiagonal QR iteration, computing left */
4153 /* singular vectors of L in WORK(IR) and computing */
4154 /* right singular vectors of L in WORK(IU) */
4155 /* (Workspace: need 2*M*M+BDSPAC) */
4157 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
4158 iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1,
4159 &work[iwork], info);
4161 /* Multiply right singular vectors of L in WORK(IU) by */
4162 /* Q in VT, storing result in A */
4163 /* (Workspace: need M*M) */
4165 sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku,
4166 &vt[vt_offset], ldvt, &c_b57, &a[a_offset],
4169 /* Copy right singular vectors of A from A to VT */
4171 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
4174 /* Copy left singular vectors of A from WORK(IR) to A */
4176 slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset],
4181 /* Insufficient workspace for a fast algorithm */
4186 /* Compute A=L*Q, copying result to VT */
4187 /* (Workspace: need 2*M, prefer M+M*NB) */
4189 i__2 = *lwork - iwork + 1;
4190 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4191 iwork], &i__2, &ierr);
4192 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
4195 /* Generate Q in VT */
4196 /* (Workspace: need M+N, prefer M+N*NB) */
4198 i__2 = *lwork - iwork + 1;
4199 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4200 work[iwork], &i__2, &ierr);
4206 /* Zero out above L in A */
4210 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1
4213 /* Bidiagonalize L in A */
4214 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
4216 i__2 = *lwork - iwork + 1;
4217 sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
4218 work[itauq], &work[itaup], &work[iwork], &
4221 /* Multiply right bidiagonalizing vectors in A by Q */
4223 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
4225 i__2 = *lwork - iwork + 1;
4226 sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
4227 work[itaup], &vt[vt_offset], ldvt, &work[
4228 iwork], &i__2, &ierr);
4230 /* Generate left bidiagonalizing vectors in A */
4231 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
4233 i__2 = *lwork - iwork + 1;
4234 sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
4235 &work[iwork], &i__2, &ierr);
4238 /* Perform bidiagonal QR iteration, computing left */
4239 /* singular vectors of A in A and computing right */
4240 /* singular vectors of A in VT */
4241 /* (Workspace: need BDSPAC) */
4243 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
4244 vt_offset], ldvt, &a[a_offset], lda, dum, &
4245 c__1, &work[iwork], info);
4249 } else if (wntuas) {
4251 /* Path 9t(N much larger than M, JOBU='S' or 'A', */
4253 /* N right singular vectors to be computed in VT and */
4254 /* M left singular vectors to be computed in U */
4257 i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
4258 if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
4260 /* Sufficient workspace for a fast algorithm */
4263 if (*lwork >= wrkbl + *lda * *m) {
4265 /* WORK(IU) is LDA by M */
4270 /* WORK(IU) is M by M */
4274 itau = iu + ldwrku * *m;
4277 /* Compute A=L*Q, copying result to VT */
4278 /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
4280 i__2 = *lwork - iwork + 1;
4281 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4282 iwork], &i__2, &ierr);
4283 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
4286 /* Generate Q in VT */
4287 /* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
4289 i__2 = *lwork - iwork + 1;
4290 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4291 work[iwork], &i__2, &ierr);
4293 /* Copy L to WORK(IU), zeroing out above it */
4295 slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
4299 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu +
4306 /* Bidiagonalize L in WORK(IU), copying result to U */
4307 /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
4309 i__2 = *lwork - iwork + 1;
4310 sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
4311 work[itauq], &work[itaup], &work[iwork], &
4313 slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset],
4316 /* Generate right bidiagonalizing vectors in WORK(IU) */
4317 /* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
4319 i__2 = *lwork - iwork + 1;
4320 sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
4321 , &work[iwork], &i__2, &ierr);
4323 /* Generate left bidiagonalizing vectors in U */
4324 /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
4326 i__2 = *lwork - iwork + 1;
4327 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4328 &work[iwork], &i__2, &ierr);
4331 /* Perform bidiagonal QR iteration, computing left */
4332 /* singular vectors of L in U and computing right */
4333 /* singular vectors of L in WORK(IU) */
4334 /* (Workspace: need M*M+BDSPAC) */
4336 sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
4337 iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
4340 /* Multiply right singular vectors of L in WORK(IU) by */
4341 /* Q in VT, storing result in A */
4342 /* (Workspace: need M*M) */
4344 sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku,
4345 &vt[vt_offset], ldvt, &c_b57, &a[a_offset],
4348 /* Copy right singular vectors of A from A to VT */
4350 slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset],
4355 /* Insufficient workspace for a fast algorithm */
4360 /* Compute A=L*Q, copying result to VT */
4361 /* (Workspace: need 2*M, prefer M+M*NB) */
4363 i__2 = *lwork - iwork + 1;
4364 sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4365 iwork], &i__2, &ierr);
4366 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset],
4369 /* Generate Q in VT */
4370 /* (Workspace: need M+N, prefer M+N*NB) */
4372 i__2 = *lwork - iwork + 1;
4373 sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4374 work[iwork], &i__2, &ierr);
4376 /* Copy L to U, zeroing out above it */
4378 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset],
4382 slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1
4389 /* Bidiagonalize L in U */
4390 /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */
4392 i__2 = *lwork - iwork + 1;
4393 sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
4394 work[itauq], &work[itaup], &work[iwork], &
4397 /* Multiply right bidiagonalizing vectors in U by Q */
4399 /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */
4401 i__2 = *lwork - iwork + 1;
4402 sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
4403 work[itaup], &vt[vt_offset], ldvt, &work[
4404 iwork], &i__2, &ierr);
4406 /* Generate left bidiagonalizing vectors in U */
4407 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
4409 i__2 = *lwork - iwork + 1;
4410 sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4411 &work[iwork], &i__2, &ierr);
4414 /* Perform bidiagonal QR iteration, computing left */
4415 /* singular vectors of A in U and computing right */
4416 /* singular vectors of A in VT */
4417 /* (Workspace: need BDSPAC) */
4419 sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
4420 vt_offset], ldvt, &u[u_offset], ldu, dum, &
4421 c__1, &work[iwork], info);
4433 /* Path 10t(N greater than M, but not much larger) */
4434 /* Reduce to bidiagonal form without LQ decomposition */
4441 /* Bidiagonalize A */
4442 /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
4444 i__2 = *lwork - iwork + 1;
4445 sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
4446 work[itaup], &work[iwork], &i__2, &ierr);
4449 /* If left singular vectors desired in U, copy result to U */
4450 /* and generate left bidiagonalizing vectors in U */
4451 /* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
4453 slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
4454 i__2 = *lwork - iwork + 1;
4455 sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
4456 iwork], &i__2, &ierr);
4460 /* If right singular vectors desired in VT, copy result to */
4461 /* VT and generate right bidiagonalizing vectors in VT */
4462 /* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */
4464 slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
4471 i__2 = *lwork - iwork + 1;
4472 sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup],
4473 &work[iwork], &i__2, &ierr);
4477 /* If left singular vectors desired in A, generate left */
4478 /* bidiagonalizing vectors in A */
4479 /* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
4481 i__2 = *lwork - iwork + 1;
4482 sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
4483 iwork], &i__2, &ierr);
4487 /* If right singular vectors desired in A, generate right */
4488 /* bidiagonalizing vectors in A */
4489 /* (Workspace: need 4*M, prefer 3*M+M*NB) */
4491 i__2 = *lwork - iwork + 1;
4492 sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
4493 iwork], &i__2, &ierr);
4496 if (wntuas || wntuo) {
4502 if (wntvas || wntvo) {
4508 if (! wntuo && ! wntvo) {
4510 /* Perform bidiagonal QR iteration, if desired, computing */
4511 /* left singular vectors in U and computing right singular */
4513 /* (Workspace: need BDSPAC) */
4515 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
4516 vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
4518 } else if (! wntuo && wntvo) {
4520 /* Perform bidiagonal QR iteration, if desired, computing */
4521 /* left singular vectors in U and computing right singular */
4523 /* (Workspace: need BDSPAC) */
4525 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
4526 a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
4530 /* Perform bidiagonal QR iteration, if desired, computing */
4531 /* left singular vectors in A and computing right singular */
4533 /* (Workspace: need BDSPAC) */
4535 sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
4536 vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
4544 /* If SBDSQR failed to converge, copy unconverged superdiagonals */
4545 /* to WORK( 2:MINMN ) */
4550 for (i__ = 1; i__ <= i__2; ++i__) {
4551 work[i__ + 1] = work[i__ + ie - 1];
4556 for (i__ = minmn - 1; i__ >= 1; --i__) {
4557 work[i__ + 1] = work[i__ + ie - 1];
4563 /* Undo scaling if necessary */
4566 if (anrm > bignum) {
4567 slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
4570 if (*info != 0 && anrm > bignum) {
4572 slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
4575 if (anrm < smlnum) {
4576 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
4579 if (*info != 0 && anrm < smlnum) {
4581 slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
4586 /* Return optimal workspace in WORK(1) */
4588 work[1] = (real) maxwrk;