C_LAPACK: Fixes to make it compile with MSVC (#3605)
[platform/upstream/openblas.git] / lapack-netlib / SRC / cgesvd.c
1 #include <math.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <stdio.h>
5 #include <complex.h>
6 #ifdef complex
7 #undef complex
8 #endif
9 #ifdef I
10 #undef I
11 #endif
12
13 #if defined(_WIN64)
14 typedef long long BLASLONG;
15 typedef unsigned long long BLASULONG;
16 #else
17 typedef long BLASLONG;
18 typedef unsigned long BLASULONG;
19 #endif
20
21 #ifdef LAPACK_ILP64
22 typedef BLASLONG blasint;
23 #if defined(_WIN64)
24 #define blasabs(x) llabs(x)
25 #else
26 #define blasabs(x) labs(x)
27 #endif
28 #else
29 typedef int blasint;
30 #define blasabs(x) abs(x)
31 #endif
32
33 typedef blasint integer;
34
35 typedef unsigned int uinteger;
36 typedef char *address;
37 typedef short int shortint;
38 typedef float real;
39 typedef double doublereal;
40 typedef struct { real r, i; } complex;
41 typedef struct { doublereal r, i; } doublecomplex;
42 #ifdef _MSC_VER
43 static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
44 static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
45 static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
46 static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
47 #else
48 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
49 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
50 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
51 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
52 #endif
53 #define pCf(z) (*_pCf(z))
54 #define pCd(z) (*_pCd(z))
55 typedef int logical;
56 typedef short int shortlogical;
57 typedef char logical1;
58 typedef char integer1;
59
60 #define TRUE_ (1)
61 #define FALSE_ (0)
62
63 /* Extern is for use with -E */
64 #ifndef Extern
65 #define Extern extern
66 #endif
67
68 /* I/O stuff */
69
70 typedef int flag;
71 typedef int ftnlen;
72 typedef int ftnint;
73
74 /*external read, write*/
75 typedef struct
76 {       flag cierr;
77         ftnint ciunit;
78         flag ciend;
79         char *cifmt;
80         ftnint cirec;
81 } cilist;
82
83 /*internal read, write*/
84 typedef struct
85 {       flag icierr;
86         char *iciunit;
87         flag iciend;
88         char *icifmt;
89         ftnint icirlen;
90         ftnint icirnum;
91 } icilist;
92
93 /*open*/
94 typedef struct
95 {       flag oerr;
96         ftnint ounit;
97         char *ofnm;
98         ftnlen ofnmlen;
99         char *osta;
100         char *oacc;
101         char *ofm;
102         ftnint orl;
103         char *oblnk;
104 } olist;
105
106 /*close*/
107 typedef struct
108 {       flag cerr;
109         ftnint cunit;
110         char *csta;
111 } cllist;
112
113 /*rewind, backspace, endfile*/
114 typedef struct
115 {       flag aerr;
116         ftnint aunit;
117 } alist;
118
119 /* inquire */
120 typedef struct
121 {       flag inerr;
122         ftnint inunit;
123         char *infile;
124         ftnlen infilen;
125         ftnint  *inex;  /*parameters in standard's order*/
126         ftnint  *inopen;
127         ftnint  *innum;
128         ftnint  *innamed;
129         char    *inname;
130         ftnlen  innamlen;
131         char    *inacc;
132         ftnlen  inacclen;
133         char    *inseq;
134         ftnlen  inseqlen;
135         char    *indir;
136         ftnlen  indirlen;
137         char    *infmt;
138         ftnlen  infmtlen;
139         char    *inform;
140         ftnint  informlen;
141         char    *inunf;
142         ftnlen  inunflen;
143         ftnint  *inrecl;
144         ftnint  *innrec;
145         char    *inblank;
146         ftnlen  inblanklen;
147 } inlist;
148
149 #define VOID void
150
151 union Multitype {       /* for multiple entry points */
152         integer1 g;
153         shortint h;
154         integer i;
155         /* longint j; */
156         real r;
157         doublereal d;
158         complex c;
159         doublecomplex z;
160         };
161
162 typedef union Multitype Multitype;
163
164 struct Vardesc {        /* for Namelist */
165         char *name;
166         char *addr;
167         ftnlen *dims;
168         int  type;
169         };
170 typedef struct Vardesc Vardesc;
171
172 struct Namelist {
173         char *name;
174         Vardesc **vars;
175         int nvars;
176         };
177 typedef struct Namelist Namelist;
178
179 #define abs(x) ((x) >= 0 ? (x) : -(x))
180 #define dabs(x) (fabs(x))
181 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
182 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
183 #define dmin(a,b) (f2cmin(a,b))
184 #define dmax(a,b) (f2cmax(a,b))
185 #define bit_test(a,b)   ((a) >> (b) & 1)
186 #define bit_clear(a,b)  ((a) & ~((uinteger)1 << (b)))
187 #define bit_set(a,b)    ((a) |  ((uinteger)1 << (b)))
188
189 #define abort_() { sig_die("Fortran abort routine called", 1); }
190 #define c_abs(z) (cabsf(Cf(z)))
191 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
192 #ifdef _MSC_VER
193 #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
194 #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
195 #else
196 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
197 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
198 #endif
199 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
200 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
201 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
202 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
203 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
204 #define d_abs(x) (fabs(*(x)))
205 #define d_acos(x) (acos(*(x)))
206 #define d_asin(x) (asin(*(x)))
207 #define d_atan(x) (atan(*(x)))
208 #define d_atn2(x, y) (atan2(*(x),*(y)))
209 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
210 #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
211 #define d_cos(x) (cos(*(x)))
212 #define d_cosh(x) (cosh(*(x)))
213 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
214 #define d_exp(x) (exp(*(x)))
215 #define d_imag(z) (cimag(Cd(z)))
216 #define r_imag(z) (cimagf(Cf(z)))
217 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
218 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
219 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
220 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
221 #define d_log(x) (log(*(x)))
222 #define d_mod(x, y) (fmod(*(x), *(y)))
223 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
224 #define d_nint(x) u_nint(*(x))
225 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
226 #define d_sign(a,b) u_sign(*(a),*(b))
227 #define r_sign(a,b) u_sign(*(a),*(b))
228 #define d_sin(x) (sin(*(x)))
229 #define d_sinh(x) (sinh(*(x)))
230 #define d_sqrt(x) (sqrt(*(x)))
231 #define d_tan(x) (tan(*(x)))
232 #define d_tanh(x) (tanh(*(x)))
233 #define i_abs(x) abs(*(x))
234 #define i_dnnt(x) ((integer)u_nint(*(x)))
235 #define i_len(s, n) (n)
236 #define i_nint(x) ((integer)u_nint(*(x)))
237 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
238 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
239 #define pow_si(B,E) spow_ui(*(B),*(E))
240 #define pow_ri(B,E) spow_ui(*(B),*(E))
241 #define pow_di(B,E) dpow_ui(*(B),*(E))
242 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
243 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
244 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
245 #define s_cat(lpp, rpp, rnp, np, llp) {         ftnlen i, nc, ll; char *f__rp, *lp;     ll = (llp); lp = (lpp);         for(i=0; i < (int)*(np); ++i) {                 nc = ll;                if((rnp)[i] < nc) nc = (rnp)[i];                ll -= nc;               f__rp = (rpp)[i];               while(--nc >= 0) *lp++ = *(f__rp)++;         }  while(--ll >= 0) *lp++ = ' '; }
246 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
247 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
248 #define sig_die(s, kill) { exit(1); }
249 #define s_stop(s, n) {exit(0);}
250 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
251 #define z_abs(z) (cabs(Cd(z)))
252 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
253 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
254 #define myexit_() break;
255 #define mycycle() continue;
256 #define myceiling(w) {ceil(w)}
257 #define myhuge(w) {HUGE_VAL}
258 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
259 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
260
261 /* procedure parameter types for -A and -C++ */
262
263 #define F2C_proc_par_types 1
264 #ifdef __cplusplus
265 typedef logical (*L_fp)(...);
266 #else
267 typedef logical (*L_fp)();
268 #endif
269
270 static float spow_ui(float x, integer n) {
271         float pow=1.0; unsigned long int u;
272         if(n != 0) {
273                 if(n < 0) n = -n, x = 1/x;
274                 for(u = n; ; ) {
275                         if(u & 01) pow *= x;
276                         if(u >>= 1) x *= x;
277                         else break;
278                 }
279         }
280         return pow;
281 }
282 static double dpow_ui(double x, integer n) {
283         double pow=1.0; unsigned long int u;
284         if(n != 0) {
285                 if(n < 0) n = -n, x = 1/x;
286                 for(u = n; ; ) {
287                         if(u & 01) pow *= x;
288                         if(u >>= 1) x *= x;
289                         else break;
290                 }
291         }
292         return pow;
293 }
294 #ifdef _MSC_VER
295 static _Fcomplex cpow_ui(complex x, integer n) {
296         complex pow={1.0,0.0}; unsigned long int u;
297                 if(n != 0) {
298                 if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
299                 for(u = n; ; ) {
300                         if(u & 01) pow.r *= x.r, pow.i *= x.i;
301                         if(u >>= 1) x.r *= x.r, x.i *= x.i;
302                         else break;
303                 }
304         }
305         _Fcomplex p={pow.r, pow.i};
306         return p;
307 }
308 #else
309 static _Complex float cpow_ui(_Complex float x, integer n) {
310         _Complex float pow=1.0; unsigned long int u;
311         if(n != 0) {
312                 if(n < 0) n = -n, x = 1/x;
313                 for(u = n; ; ) {
314                         if(u & 01) pow *= x;
315                         if(u >>= 1) x *= x;
316                         else break;
317                 }
318         }
319         return pow;
320 }
321 #endif
322 #ifdef _MSC_VER
323 static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
324         _Dcomplex pow={1.0,0.0}; unsigned long int u;
325         if(n != 0) {
326                 if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
327                 for(u = n; ; ) {
328                         if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
329                         if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
330                         else break;
331                 }
332         }
333         _Dcomplex p = {pow._Val[0], pow._Val[1]};
334         return p;
335 }
336 #else
337 static _Complex double zpow_ui(_Complex double x, integer n) {
338         _Complex double pow=1.0; unsigned long int u;
339         if(n != 0) {
340                 if(n < 0) n = -n, x = 1/x;
341                 for(u = n; ; ) {
342                         if(u & 01) pow *= x;
343                         if(u >>= 1) x *= x;
344                         else break;
345                 }
346         }
347         return pow;
348 }
349 #endif
350 static integer pow_ii(integer x, integer n) {
351         integer pow; unsigned long int u;
352         if (n <= 0) {
353                 if (n == 0 || x == 1) pow = 1;
354                 else if (x != -1) pow = x == 0 ? 1/x : 0;
355                 else n = -n;
356         }
357         if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
358                 u = n;
359                 for(pow = 1; ; ) {
360                         if(u & 01) pow *= x;
361                         if(u >>= 1) x *= x;
362                         else break;
363                 }
364         }
365         return pow;
366 }
367 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
368 {
369         double m; integer i, mi;
370         for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
371                 if (w[i-1]>m) mi=i ,m=w[i-1];
372         return mi-s+1;
373 }
374 static integer smaxloc_(float *w, integer s, integer e, integer *n)
375 {
376         float m; integer i, mi;
377         for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
378                 if (w[i-1]>m) mi=i ,m=w[i-1];
379         return mi-s+1;
380 }
381 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
382         integer n = *n_, incx = *incx_, incy = *incy_, i;
383 #ifdef _MSC_VER
384         _Fcomplex zdotc = {0.0, 0.0};
385         if (incx == 1 && incy == 1) {
386                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
387                         zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
388                         zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
389                 }
390         } else {
391                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
392                         zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
393                         zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
394                 }
395         }
396         pCf(z) = zdotc;
397 }
398 #else
399         _Complex float zdotc = 0.0;
400         if (incx == 1 && incy == 1) {
401                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
402                         zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
403                 }
404         } else {
405                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
406                         zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
407                 }
408         }
409         pCf(z) = zdotc;
410 }
411 #endif
412 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
413         integer n = *n_, incx = *incx_, incy = *incy_, i;
414 #ifdef _MSC_VER
415         _Dcomplex zdotc = {0.0, 0.0};
416         if (incx == 1 && incy == 1) {
417                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
418                         zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
419                         zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
420                 }
421         } else {
422                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
423                         zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
424                         zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
425                 }
426         }
427         pCd(z) = zdotc;
428 }
429 #else
430         _Complex double zdotc = 0.0;
431         if (incx == 1 && incy == 1) {
432                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
433                         zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
434                 }
435         } else {
436                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
437                         zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
438                 }
439         }
440         pCd(z) = zdotc;
441 }
442 #endif  
443 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
444         integer n = *n_, incx = *incx_, incy = *incy_, i;
445 #ifdef _MSC_VER
446         _Fcomplex zdotc = {0.0, 0.0};
447         if (incx == 1 && incy == 1) {
448                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
449                         zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
450                         zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
451                 }
452         } else {
453                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
454                         zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
455                         zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
456                 }
457         }
458         pCf(z) = zdotc;
459 }
460 #else
461         _Complex float zdotc = 0.0;
462         if (incx == 1 && incy == 1) {
463                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
464                         zdotc += Cf(&x[i]) * Cf(&y[i]);
465                 }
466         } else {
467                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
468                         zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
469                 }
470         }
471         pCf(z) = zdotc;
472 }
473 #endif
474 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
475         integer n = *n_, incx = *incx_, incy = *incy_, i;
476 #ifdef _MSC_VER
477         _Dcomplex zdotc = {0.0, 0.0};
478         if (incx == 1 && incy == 1) {
479                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
480                         zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
481                         zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
482                 }
483         } else {
484                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
485                         zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
486                         zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
487                 }
488         }
489         pCd(z) = zdotc;
490 }
491 #else
492         _Complex double zdotc = 0.0;
493         if (incx == 1 && incy == 1) {
494                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
495                         zdotc += Cd(&x[i]) * Cd(&y[i]);
496                 }
497         } else {
498                 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
499                         zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
500                 }
501         }
502         pCd(z) = zdotc;
503 }
504 #endif
505 /*  -- translated by f2c (version 20000121).
506    You must link the resulting object file with the libraries:
507         -lf2c -lm   (in that order)
508 */
509
510
511
512
513 /* Table of constant values */
514
515 static complex c_b1 = {0.f,0.f};
516 static complex c_b2 = {1.f,0.f};
517 static integer c__6 = 6;
518 static integer c__0 = 0;
519 static integer c__2 = 2;
520 static integer c_n1 = -1;
521 static integer c__1 = 1;
522
523 /* > \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b> */
524
525 /*  =========== DOCUMENTATION =========== */
526
527 /* Online html documentation available at */
528 /*            http://www.netlib.org/lapack/explore-html/ */
529
530 /* > \htmlonly */
531 /* > Download CGESVD + dependencies */
532 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.
533 f"> */
534 /* > [TGZ]</a> */
535 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.
536 f"> */
537 /* > [ZIP]</a> */
538 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.
539 f"> */
540 /* > [TXT]</a> */
541 /* > \endhtmlonly */
542
543 /*  Definition: */
544 /*  =========== */
545
546 /*       SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */
547 /*                          WORK, LWORK, RWORK, INFO ) */
548
549 /*       CHARACTER          JOBU, JOBVT */
550 /*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
551 /*       REAL               RWORK( * ), S( * ) */
552 /*       COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ), */
553 /*      $                   WORK( * ) */
554
555
556 /* > \par Purpose: */
557 /*  ============= */
558 /* > */
559 /* > \verbatim */
560 /* > */
561 /* > CGESVD computes the singular value decomposition (SVD) of a complex */
562 /* > M-by-N matrix A, optionally computing the left and/or right singular */
563 /* > vectors. The SVD is written */
564 /* > */
565 /* >      A = U * SIGMA * conjugate-transpose(V) */
566 /* > */
567 /* > where SIGMA is an M-by-N matrix which is zero except for its */
568 /* > f2cmin(m,n) diagonal elements, U is an M-by-M unitary matrix, and */
569 /* > V is an N-by-N unitary matrix.  The diagonal elements of SIGMA */
570 /* > are the singular values of A; they are real and non-negative, and */
571 /* > are returned in descending order.  The first f2cmin(m,n) columns of */
572 /* > U and V are the left and right singular vectors of A. */
573 /* > */
574 /* > Note that the routine returns V**H, not V. */
575 /* > \endverbatim */
576
577 /*  Arguments: */
578 /*  ========== */
579
580 /* > \param[in] JOBU */
581 /* > \verbatim */
582 /* >          JOBU is CHARACTER*1 */
583 /* >          Specifies options for computing all or part of the matrix U: */
584 /* >          = 'A':  all M columns of U are returned in array U: */
585 /* >          = 'S':  the first f2cmin(m,n) columns of U (the left singular */
586 /* >                  vectors) are returned in the array U; */
587 /* >          = 'O':  the first f2cmin(m,n) columns of U (the left singular */
588 /* >                  vectors) are overwritten on the array A; */
589 /* >          = 'N':  no columns of U (no left singular vectors) are */
590 /* >                  computed. */
591 /* > \endverbatim */
592 /* > */
593 /* > \param[in] JOBVT */
594 /* > \verbatim */
595 /* >          JOBVT is CHARACTER*1 */
596 /* >          Specifies options for computing all or part of the matrix */
597 /* >          V**H: */
598 /* >          = 'A':  all N rows of V**H are returned in the array VT; */
599 /* >          = 'S':  the first f2cmin(m,n) rows of V**H (the right singular */
600 /* >                  vectors) are returned in the array VT; */
601 /* >          = 'O':  the first f2cmin(m,n) rows of V**H (the right singular */
602 /* >                  vectors) are overwritten on the array A; */
603 /* >          = 'N':  no rows of V**H (no right singular vectors) are */
604 /* >                  computed. */
605 /* > */
606 /* >          JOBVT and JOBU cannot both be 'O'. */
607 /* > \endverbatim */
608 /* > */
609 /* > \param[in] M */
610 /* > \verbatim */
611 /* >          M is INTEGER */
612 /* >          The number of rows of the input matrix A.  M >= 0. */
613 /* > \endverbatim */
614 /* > */
615 /* > \param[in] N */
616 /* > \verbatim */
617 /* >          N is INTEGER */
618 /* >          The number of columns of the input matrix A.  N >= 0. */
619 /* > \endverbatim */
620 /* > */
621 /* > \param[in,out] A */
622 /* > \verbatim */
623 /* >          A is COMPLEX array, dimension (LDA,N) */
624 /* >          On entry, the M-by-N matrix A. */
625 /* >          On exit, */
626 /* >          if JOBU = 'O',  A is overwritten with the first f2cmin(m,n) */
627 /* >                          columns of U (the left singular vectors, */
628 /* >                          stored columnwise); */
629 /* >          if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */
630 /* >                          rows of V**H (the right singular vectors, */
631 /* >                          stored rowwise); */
632 /* >          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */
633 /* >                          are destroyed. */
634 /* > \endverbatim */
635 /* > */
636 /* > \param[in] LDA */
637 /* > \verbatim */
638 /* >          LDA is INTEGER */
639 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
640 /* > \endverbatim */
641 /* > */
642 /* > \param[out] S */
643 /* > \verbatim */
644 /* >          S is REAL array, dimension (f2cmin(M,N)) */
645 /* >          The singular values of A, sorted so that S(i) >= S(i+1). */
646 /* > \endverbatim */
647 /* > */
648 /* > \param[out] U */
649 /* > \verbatim */
650 /* >          U is COMPLEX array, dimension (LDU,UCOL) */
651 /* >          (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */
652 /* >          If JOBU = 'A', U contains the M-by-M unitary matrix U; */
653 /* >          if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */
654 /* >          (the left singular vectors, stored columnwise); */
655 /* >          if JOBU = 'N' or 'O', U is not referenced. */
656 /* > \endverbatim */
657 /* > */
658 /* > \param[in] LDU */
659 /* > \verbatim */
660 /* >          LDU is INTEGER */
661 /* >          The leading dimension of the array U.  LDU >= 1; if */
662 /* >          JOBU = 'S' or 'A', LDU >= M. */
663 /* > \endverbatim */
664 /* > */
665 /* > \param[out] VT */
666 /* > \verbatim */
667 /* >          VT is COMPLEX array, dimension (LDVT,N) */
668 /* >          If JOBVT = 'A', VT contains the N-by-N unitary matrix */
669 /* >          V**H; */
670 /* >          if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */
671 /* >          V**H (the right singular vectors, stored rowwise); */
672 /* >          if JOBVT = 'N' or 'O', VT is not referenced. */
673 /* > \endverbatim */
674 /* > */
675 /* > \param[in] LDVT */
676 /* > \verbatim */
677 /* >          LDVT is INTEGER */
678 /* >          The leading dimension of the array VT.  LDVT >= 1; if */
679 /* >          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */
680 /* > \endverbatim */
681 /* > */
682 /* > \param[out] WORK */
683 /* > \verbatim */
684 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
685 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
686 /* > \endverbatim */
687 /* > */
688 /* > \param[in] LWORK */
689 /* > \verbatim */
690 /* >          LWORK is INTEGER */
691 /* >          The dimension of the array WORK. */
692 /* >          LWORK >=  MAX(1,2*MIN(M,N)+MAX(M,N)). */
693 /* >          For good performance, LWORK should generally be larger. */
694 /* > */
695 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
696 /* >          only calculates the optimal size of the WORK array, returns */
697 /* >          this value as the first entry of the WORK array, and no error */
698 /* >          message related to LWORK is issued by XERBLA. */
699 /* > \endverbatim */
700 /* > */
701 /* > \param[out] RWORK */
702 /* > \verbatim */
703 /* >          RWORK is REAL array, dimension (5*f2cmin(M,N)) */
704 /* >          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the */
705 /* >          unconverged superdiagonal elements of an upper bidiagonal */
706 /* >          matrix B whose diagonal is in S (not necessarily sorted). */
707 /* >          B satisfies A = U * B * VT, so it has the same singular */
708 /* >          values as A, and singular vectors related by U and VT. */
709 /* > \endverbatim */
710 /* > */
711 /* > \param[out] INFO */
712 /* > \verbatim */
713 /* >          INFO is INTEGER */
714 /* >          = 0:  successful exit. */
715 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
716 /* >          > 0:  if CBDSQR did not converge, INFO specifies how many */
717 /* >                superdiagonals of an intermediate bidiagonal form B */
718 /* >                did not converge to zero. See the description of RWORK */
719 /* >                above for details. */
720 /* > \endverbatim */
721
722 /*  Authors: */
723 /*  ======== */
724
725 /* > \author Univ. of Tennessee */
726 /* > \author Univ. of California Berkeley */
727 /* > \author Univ. of Colorado Denver */
728 /* > \author NAG Ltd. */
729
730 /* > \date April 2012 */
731
732 /* > \ingroup complexGEsing */
733
734 /*  ===================================================================== */
735 /* Subroutine */ int cgesvd_(char *jobu, char *jobvt, integer *m, integer *n, 
736         complex *a, integer *lda, real *s, complex *u, integer *ldu, complex *
737         vt, integer *ldvt, complex *work, integer *lwork, real *rwork, 
738         integer *info)
739 {
740     /* System generated locals */
741     address a__1[2];
742     integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
743             i__2, i__3, i__4;
744     char ch__1[2];
745
746     /* Local variables */
747     complex cdum[1];
748     integer iscl;
749     real anrm;
750     integer ierr, itau, ncvt, nrvt, lwork_cgebrd__, lwork_cgelqf__, 
751             lwork_cgeqrf__, i__;
752     extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
753             integer *, complex *, complex *, integer *, complex *, integer *, 
754             complex *, complex *, integer *);
755     extern logical lsame_(char *, char *);
756     integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork;
757     logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
758     integer ie;
759     extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, 
760             integer *, real *, real *, complex *, complex *, complex *, 
761             integer *, integer *);
762     extern real clange_(char *, integer *, integer *, complex *, integer *, 
763             real *);
764     integer ir, iu;
765     extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, 
766             integer *, complex *, complex *, integer *, integer *), clascl_(
767             char *, integer *, integer *, real *, real *, integer *, integer *
768             , complex *, integer *, integer *), cgeqrf_(integer *, 
769             integer *, complex *, integer *, complex *, complex *, integer *, 
770             integer *);
771     extern real slamch_(char *);
772     extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
773             *, integer *, complex *, integer *), claset_(char *, 
774             integer *, integer *, complex *, complex *, complex *, integer *), cbdsqr_(char *, integer *, integer *, integer *, integer 
775             *, real *, real *, complex *, integer *, complex *, integer *, 
776             complex *, integer *, real *, integer *), xerbla_(char *, 
777             integer *, ftnlen), cungbr_(char *, integer *, integer *, integer 
778             *, complex *, integer *, complex *, complex *, integer *, integer 
779             *);
780     real bignum;
781     extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
782             real *, integer *, integer *, real *, integer *, integer *);
783     extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
784             integer *, integer *, ftnlen, ftnlen);
785     extern /* Subroutine */ int cunmbr_(char *, char *, char *, integer *, 
786             integer *, integer *, complex *, integer *, complex *, complex *, 
787             integer *, complex *, integer *, integer *), cunglq_(integer *, integer *, integer *, complex *, 
788             integer *, complex *, complex *, integer *, integer *), cungqr_(
789             integer *, integer *, integer *, complex *, integer *, complex *, 
790             complex *, integer *, integer *);
791     integer ldwrkr, minwrk, ldwrku, maxwrk;
792     real smlnum;
793     integer irwork;
794     logical lquery, wntuas, wntvas;
795     integer lwork_cungbr_p__, lwork_cungbr_q__, lwork_cunglq_n__, 
796             lwork_cunglq_m__, lwork_cungqr_m__, lwork_cungqr_n__, blk, ncu;
797     real dum[1], eps;
798     integer nru;
799
800
801 /*  -- LAPACK driver routine (version 3.7.0) -- */
802 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
803 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
804 /*     April 2012 */
805
806
807 /*  ===================================================================== */
808
809
810 /*     Test the input arguments */
811
812     /* Parameter adjustments */
813     a_dim1 = *lda;
814     a_offset = 1 + a_dim1 * 1;
815     a -= a_offset;
816     --s;
817     u_dim1 = *ldu;
818     u_offset = 1 + u_dim1 * 1;
819     u -= u_offset;
820     vt_dim1 = *ldvt;
821     vt_offset = 1 + vt_dim1 * 1;
822     vt -= vt_offset;
823     --work;
824     --rwork;
825
826     /* Function Body */
827     *info = 0;
828     minmn = f2cmin(*m,*n);
829     wntua = lsame_(jobu, "A");
830     wntus = lsame_(jobu, "S");
831     wntuas = wntua || wntus;
832     wntuo = lsame_(jobu, "O");
833     wntun = lsame_(jobu, "N");
834     wntva = lsame_(jobvt, "A");
835     wntvs = lsame_(jobvt, "S");
836     wntvas = wntva || wntvs;
837     wntvo = lsame_(jobvt, "O");
838     wntvn = lsame_(jobvt, "N");
839     lquery = *lwork == -1;
840
841     if (! (wntua || wntus || wntuo || wntun)) {
842         *info = -1;
843     } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
844         *info = -2;
845     } else if (*m < 0) {
846         *info = -3;
847     } else if (*n < 0) {
848         *info = -4;
849     } else if (*lda < f2cmax(1,*m)) {
850         *info = -6;
851     } else if (*ldu < 1 || wntuas && *ldu < *m) {
852         *info = -9;
853     } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
854         *info = -11;
855     }
856
857 /*     Compute workspace */
858 /*      (Note: Comments in the code beginning "Workspace:" describe the */
859 /*       minimal amount of workspace needed at that point in the code, */
860 /*       as well as the preferred amount for good performance. */
861 /*       CWorkspace refers to complex workspace, and RWorkspace to */
862 /*       real workspace. NB refers to the optimal block size for the */
863 /*       immediately following subroutine, as returned by ILAENV.) */
864
865     if (*info == 0) {
866         minwrk = 1;
867         maxwrk = 1;
868         if (*m >= *n && minmn > 0) {
869
870 /*           Space needed for ZBDSQR is BDSPAC = 5*N */
871
872 /* Writing concatenation */
873             i__1[0] = 1, a__1[0] = jobu;
874             i__1[1] = 1, a__1[1] = jobvt;
875             s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
876             mnthr = ilaenv_(&c__6, "CGESVD", ch__1, m, n, &c__0, &c__0, (
877                     ftnlen)6, (ftnlen)2);
878 /*           Compute space needed for CGEQRF */
879             cgeqrf_(m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
880             lwork_cgeqrf__ = (integer) cdum[0].r;
881 /*           Compute space needed for CUNGQR */
882             cungqr_(m, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
883             lwork_cungqr_n__ = (integer) cdum[0].r;
884             cungqr_(m, m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
885             lwork_cungqr_m__ = (integer) cdum[0].r;
886 /*           Compute space needed for CGEBRD */
887             cgebrd_(n, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, &
888                     c_n1, &ierr);
889             lwork_cgebrd__ = (integer) cdum[0].r;
890 /*           Compute space needed for CUNGBR */
891             cungbr_("P", n, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
892             lwork_cungbr_p__ = (integer) cdum[0].r;
893             cungbr_("Q", n, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
894             lwork_cungbr_q__ = (integer) cdum[0].r;
895
896 /* Writing concatenation */
897             i__1[0] = 1, a__1[0] = jobu;
898             i__1[1] = 1, a__1[1] = jobvt;
899             s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
900             mnthr = ilaenv_(&c__6, "CGESVD", ch__1, m, n, &c__0, &c__0, (
901                     ftnlen)6, (ftnlen)2);
902             if (*m >= mnthr) {
903                 if (wntun) {
904
905 /*                 Path 1 (M much larger than N, JOBU='N') */
906
907                     maxwrk = *n + lwork_cgeqrf__;
908 /* Computing MAX */
909                     i__2 = maxwrk, i__3 = (*n << 1) + lwork_cgebrd__;
910                     maxwrk = f2cmax(i__2,i__3);
911                     if (wntvo || wntvas) {
912 /* Computing MAX */
913                         i__2 = maxwrk, i__3 = (*n << 1) + lwork_cungbr_p__;
914                         maxwrk = f2cmax(i__2,i__3);
915                     }
916                     minwrk = *n * 3;
917                 } else if (wntuo && wntvn) {
918
919 /*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
920
921                     wrkbl = *n + lwork_cgeqrf__;
922 /* Computing MAX */
923                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_n__;
924                     wrkbl = f2cmax(i__2,i__3);
925 /* Computing MAX */
926                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
927                     wrkbl = f2cmax(i__2,i__3);
928 /* Computing MAX */
929                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
930                     wrkbl = f2cmax(i__2,i__3);
931 /* Computing MAX */
932                     i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n;
933                     maxwrk = f2cmax(i__2,i__3);
934                     minwrk = (*n << 1) + *m;
935                 } else if (wntuo && wntvas) {
936
937 /*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */
938 /*                 'A') */
939
940                     wrkbl = *n + lwork_cgeqrf__;
941 /* Computing MAX */
942                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_n__;
943                     wrkbl = f2cmax(i__2,i__3);
944 /* Computing MAX */
945                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
946                     wrkbl = f2cmax(i__2,i__3);
947 /* Computing MAX */
948                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
949                     wrkbl = f2cmax(i__2,i__3);
950 /* Computing MAX */
951                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_p__;
952                     wrkbl = f2cmax(i__2,i__3);
953 /* Computing MAX */
954                     i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n;
955                     maxwrk = f2cmax(i__2,i__3);
956                     minwrk = (*n << 1) + *m;
957                 } else if (wntus && wntvn) {
958
959 /*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
960
961                     wrkbl = *n + lwork_cgeqrf__;
962 /* Computing MAX */
963                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_n__;
964                     wrkbl = f2cmax(i__2,i__3);
965 /* Computing MAX */
966                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
967                     wrkbl = f2cmax(i__2,i__3);
968 /* Computing MAX */
969                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
970                     wrkbl = f2cmax(i__2,i__3);
971                     maxwrk = *n * *n + wrkbl;
972                     minwrk = (*n << 1) + *m;
973                 } else if (wntus && wntvo) {
974
975 /*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
976
977                     wrkbl = *n + lwork_cgeqrf__;
978 /* Computing MAX */
979                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_n__;
980                     wrkbl = f2cmax(i__2,i__3);
981 /* Computing MAX */
982                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
983                     wrkbl = f2cmax(i__2,i__3);
984 /* Computing MAX */
985                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
986                     wrkbl = f2cmax(i__2,i__3);
987 /* Computing MAX */
988                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_p__;
989                     wrkbl = f2cmax(i__2,i__3);
990                     maxwrk = (*n << 1) * *n + wrkbl;
991                     minwrk = (*n << 1) + *m;
992                 } else if (wntus && wntvas) {
993
994 /*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
995 /*                 'A') */
996
997                     wrkbl = *n + lwork_cgeqrf__;
998 /* Computing MAX */
999                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_n__;
1000                     wrkbl = f2cmax(i__2,i__3);
1001 /* Computing MAX */
1002                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
1003                     wrkbl = f2cmax(i__2,i__3);
1004 /* Computing MAX */
1005                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
1006                     wrkbl = f2cmax(i__2,i__3);
1007 /* Computing MAX */
1008                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_p__;
1009                     wrkbl = f2cmax(i__2,i__3);
1010                     maxwrk = *n * *n + wrkbl;
1011                     minwrk = (*n << 1) + *m;
1012                 } else if (wntua && wntvn) {
1013
1014 /*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
1015
1016                     wrkbl = *n + lwork_cgeqrf__;
1017 /* Computing MAX */
1018                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_m__;
1019                     wrkbl = f2cmax(i__2,i__3);
1020 /* Computing MAX */
1021                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
1022                     wrkbl = f2cmax(i__2,i__3);
1023 /* Computing MAX */
1024                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
1025                     wrkbl = f2cmax(i__2,i__3);
1026                     maxwrk = *n * *n + wrkbl;
1027                     minwrk = (*n << 1) + *m;
1028                 } else if (wntua && wntvo) {
1029
1030 /*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
1031
1032                     wrkbl = *n + lwork_cgeqrf__;
1033 /* Computing MAX */
1034                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_m__;
1035                     wrkbl = f2cmax(i__2,i__3);
1036 /* Computing MAX */
1037                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
1038                     wrkbl = f2cmax(i__2,i__3);
1039 /* Computing MAX */
1040                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
1041                     wrkbl = f2cmax(i__2,i__3);
1042 /* Computing MAX */
1043                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_p__;
1044                     wrkbl = f2cmax(i__2,i__3);
1045                     maxwrk = (*n << 1) * *n + wrkbl;
1046                     minwrk = (*n << 1) + *m;
1047                 } else if (wntua && wntvas) {
1048
1049 /*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */
1050 /*                 'A') */
1051
1052                     wrkbl = *n + lwork_cgeqrf__;
1053 /* Computing MAX */
1054                     i__2 = wrkbl, i__3 = *n + lwork_cungqr_m__;
1055                     wrkbl = f2cmax(i__2,i__3);
1056 /* Computing MAX */
1057                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cgebrd__;
1058                     wrkbl = f2cmax(i__2,i__3);
1059 /* Computing MAX */
1060                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_q__;
1061                     wrkbl = f2cmax(i__2,i__3);
1062 /* Computing MAX */
1063                     i__2 = wrkbl, i__3 = (*n << 1) + lwork_cungbr_p__;
1064                     wrkbl = f2cmax(i__2,i__3);
1065                     maxwrk = *n * *n + wrkbl;
1066                     minwrk = (*n << 1) + *m;
1067                 }
1068             } else {
1069
1070 /*              Path 10 (M at least N, but not much larger) */
1071
1072                 cgebrd_(m, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum,
1073                          &c_n1, &ierr);
1074                 lwork_cgebrd__ = (integer) cdum[0].r;
1075                 maxwrk = (*n << 1) + lwork_cgebrd__;
1076                 if (wntus || wntuo) {
1077                     cungbr_("Q", m, n, n, &a[a_offset], lda, cdum, cdum, &
1078                             c_n1, &ierr);
1079                     lwork_cungbr_q__ = (integer) cdum[0].r;
1080 /* Computing MAX */
1081                     i__2 = maxwrk, i__3 = (*n << 1) + lwork_cungbr_q__;
1082                     maxwrk = f2cmax(i__2,i__3);
1083                 }
1084                 if (wntua) {
1085                     cungbr_("Q", m, m, n, &a[a_offset], lda, cdum, cdum, &
1086                             c_n1, &ierr);
1087                     lwork_cungbr_q__ = (integer) cdum[0].r;
1088 /* Computing MAX */
1089                     i__2 = maxwrk, i__3 = (*n << 1) + lwork_cungbr_q__;
1090                     maxwrk = f2cmax(i__2,i__3);
1091                 }
1092                 if (! wntvn) {
1093 /* Computing MAX */
1094                     i__2 = maxwrk, i__3 = (*n << 1) + lwork_cungbr_p__;
1095                     maxwrk = f2cmax(i__2,i__3);
1096                 }
1097                 minwrk = (*n << 1) + *m;
1098             }
1099         } else if (minmn > 0) {
1100
1101 /*           Space needed for CBDSQR is BDSPAC = 5*M */
1102
1103 /* Writing concatenation */
1104             i__1[0] = 1, a__1[0] = jobu;
1105             i__1[1] = 1, a__1[1] = jobvt;
1106             s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
1107             mnthr = ilaenv_(&c__6, "CGESVD", ch__1, m, n, &c__0, &c__0, (
1108                     ftnlen)6, (ftnlen)2);
1109 /*           Compute space needed for CGELQF */
1110             cgelqf_(m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
1111             lwork_cgelqf__ = (integer) cdum[0].r;
1112 /*           Compute space needed for CUNGLQ */
1113             cunglq_(n, n, m, cdum, n, cdum, cdum, &c_n1, &ierr);
1114             lwork_cunglq_n__ = (integer) cdum[0].r;
1115             cunglq_(m, n, m, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr);
1116             lwork_cunglq_m__ = (integer) cdum[0].r;
1117 /*           Compute space needed for CGEBRD */
1118             cgebrd_(m, m, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, &
1119                     c_n1, &ierr);
1120             lwork_cgebrd__ = (integer) cdum[0].r;
1121 /*            Compute space needed for CUNGBR P */
1122             cungbr_("P", m, m, m, &a[a_offset], n, cdum, cdum, &c_n1, &ierr);
1123             lwork_cungbr_p__ = (integer) cdum[0].r;
1124 /*           Compute space needed for CUNGBR Q */
1125             cungbr_("Q", m, m, m, &a[a_offset], n, cdum, cdum, &c_n1, &ierr);
1126             lwork_cungbr_q__ = (integer) cdum[0].r;
1127             if (*n >= mnthr) {
1128                 if (wntvn) {
1129
1130 /*                 Path 1t(N much larger than M, JOBVT='N') */
1131
1132                     maxwrk = *m + lwork_cgelqf__;
1133 /* Computing MAX */
1134                     i__2 = maxwrk, i__3 = (*m << 1) + lwork_cgebrd__;
1135                     maxwrk = f2cmax(i__2,i__3);
1136                     if (wntuo || wntuas) {
1137 /* Computing MAX */
1138                         i__2 = maxwrk, i__3 = (*m << 1) + lwork_cungbr_q__;
1139                         maxwrk = f2cmax(i__2,i__3);
1140                     }
1141                     minwrk = *m * 3;
1142                 } else if (wntvo && wntun) {
1143
1144 /*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
1145
1146                     wrkbl = *m + lwork_cgelqf__;
1147 /* Computing MAX */
1148                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_m__;
1149                     wrkbl = f2cmax(i__2,i__3);
1150 /* Computing MAX */
1151                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1152                     wrkbl = f2cmax(i__2,i__3);
1153 /* Computing MAX */
1154                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1155                     wrkbl = f2cmax(i__2,i__3);
1156 /* Computing MAX */
1157                     i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n;
1158                     maxwrk = f2cmax(i__2,i__3);
1159                     minwrk = (*m << 1) + *n;
1160                 } else if (wntvo && wntuas) {
1161
1162 /*                 Path 3t(N much larger than M, JOBU='S' or 'A', */
1163 /*                 JOBVT='O') */
1164
1165                     wrkbl = *m + lwork_cgelqf__;
1166 /* Computing MAX */
1167                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_m__;
1168                     wrkbl = f2cmax(i__2,i__3);
1169 /* Computing MAX */
1170                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1171                     wrkbl = f2cmax(i__2,i__3);
1172 /* Computing MAX */
1173                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1174                     wrkbl = f2cmax(i__2,i__3);
1175 /* Computing MAX */
1176                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_q__;
1177                     wrkbl = f2cmax(i__2,i__3);
1178 /* Computing MAX */
1179                     i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n;
1180                     maxwrk = f2cmax(i__2,i__3);
1181                     minwrk = (*m << 1) + *n;
1182                 } else if (wntvs && wntun) {
1183
1184 /*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
1185
1186                     wrkbl = *m + lwork_cgelqf__;
1187 /* Computing MAX */
1188                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_m__;
1189                     wrkbl = f2cmax(i__2,i__3);
1190 /* Computing MAX */
1191                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1192                     wrkbl = f2cmax(i__2,i__3);
1193 /* Computing MAX */
1194                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1195                     wrkbl = f2cmax(i__2,i__3);
1196                     maxwrk = *m * *m + wrkbl;
1197                     minwrk = (*m << 1) + *n;
1198                 } else if (wntvs && wntuo) {
1199
1200 /*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
1201
1202                     wrkbl = *m + lwork_cgelqf__;
1203 /* Computing MAX */
1204                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_m__;
1205                     wrkbl = f2cmax(i__2,i__3);
1206 /* Computing MAX */
1207                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1208                     wrkbl = f2cmax(i__2,i__3);
1209 /* Computing MAX */
1210                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1211                     wrkbl = f2cmax(i__2,i__3);
1212 /* Computing MAX */
1213                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_q__;
1214                     wrkbl = f2cmax(i__2,i__3);
1215                     maxwrk = (*m << 1) * *m + wrkbl;
1216                     minwrk = (*m << 1) + *n;
1217                 } else if (wntvs && wntuas) {
1218
1219 /*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
1220 /*                 JOBVT='S') */
1221
1222                     wrkbl = *m + lwork_cgelqf__;
1223 /* Computing MAX */
1224                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_m__;
1225                     wrkbl = f2cmax(i__2,i__3);
1226 /* Computing MAX */
1227                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1228                     wrkbl = f2cmax(i__2,i__3);
1229 /* Computing MAX */
1230                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1231                     wrkbl = f2cmax(i__2,i__3);
1232 /* Computing MAX */
1233                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_q__;
1234                     wrkbl = f2cmax(i__2,i__3);
1235                     maxwrk = *m * *m + wrkbl;
1236                     minwrk = (*m << 1) + *n;
1237                 } else if (wntva && wntun) {
1238
1239 /*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
1240
1241                     wrkbl = *m + lwork_cgelqf__;
1242 /* Computing MAX */
1243                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_n__;
1244                     wrkbl = f2cmax(i__2,i__3);
1245 /* Computing MAX */
1246                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1247                     wrkbl = f2cmax(i__2,i__3);
1248 /* Computing MAX */
1249                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1250                     wrkbl = f2cmax(i__2,i__3);
1251                     maxwrk = *m * *m + wrkbl;
1252                     minwrk = (*m << 1) + *n;
1253                 } else if (wntva && wntuo) {
1254
1255 /*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
1256
1257                     wrkbl = *m + lwork_cgelqf__;
1258 /* Computing MAX */
1259                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_n__;
1260                     wrkbl = f2cmax(i__2,i__3);
1261 /* Computing MAX */
1262                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1263                     wrkbl = f2cmax(i__2,i__3);
1264 /* Computing MAX */
1265                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1266                     wrkbl = f2cmax(i__2,i__3);
1267 /* Computing MAX */
1268                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_q__;
1269                     wrkbl = f2cmax(i__2,i__3);
1270                     maxwrk = (*m << 1) * *m + wrkbl;
1271                     minwrk = (*m << 1) + *n;
1272                 } else if (wntva && wntuas) {
1273
1274 /*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
1275 /*                 JOBVT='A') */
1276
1277                     wrkbl = *m + lwork_cgelqf__;
1278 /* Computing MAX */
1279                     i__2 = wrkbl, i__3 = *m + lwork_cunglq_n__;
1280                     wrkbl = f2cmax(i__2,i__3);
1281 /* Computing MAX */
1282                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cgebrd__;
1283                     wrkbl = f2cmax(i__2,i__3);
1284 /* Computing MAX */
1285                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_p__;
1286                     wrkbl = f2cmax(i__2,i__3);
1287 /* Computing MAX */
1288                     i__2 = wrkbl, i__3 = (*m << 1) + lwork_cungbr_q__;
1289                     wrkbl = f2cmax(i__2,i__3);
1290                     maxwrk = *m * *m + wrkbl;
1291                     minwrk = (*m << 1) + *n;
1292                 }
1293             } else {
1294
1295 /*              Path 10t(N greater than M, but not much larger) */
1296
1297                 cgebrd_(m, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum,
1298                          &c_n1, &ierr);
1299                 lwork_cgebrd__ = (integer) cdum[0].r;
1300                 maxwrk = (*m << 1) + lwork_cgebrd__;
1301                 if (wntvs || wntvo) {
1302 /*                Compute space needed for CUNGBR P */
1303                     cungbr_("P", m, n, m, &a[a_offset], n, cdum, cdum, &c_n1, 
1304                             &ierr);
1305                     lwork_cungbr_p__ = (integer) cdum[0].r;
1306 /* Computing MAX */
1307                     i__2 = maxwrk, i__3 = (*m << 1) + lwork_cungbr_p__;
1308                     maxwrk = f2cmax(i__2,i__3);
1309                 }
1310                 if (wntva) {
1311                     cungbr_("P", n, n, m, &a[a_offset], n, cdum, cdum, &c_n1, 
1312                             &ierr);
1313                     lwork_cungbr_p__ = (integer) cdum[0].r;
1314 /* Computing MAX */
1315                     i__2 = maxwrk, i__3 = (*m << 1) + lwork_cungbr_p__;
1316                     maxwrk = f2cmax(i__2,i__3);
1317                 }
1318                 if (! wntun) {
1319 /* Computing MAX */
1320                     i__2 = maxwrk, i__3 = (*m << 1) + lwork_cungbr_q__;
1321                     maxwrk = f2cmax(i__2,i__3);
1322                 }
1323                 minwrk = (*m << 1) + *n;
1324             }
1325         }
1326         maxwrk = f2cmax(minwrk,maxwrk);
1327         work[1].r = (real) maxwrk, work[1].i = 0.f;
1328
1329         if (*lwork < minwrk && ! lquery) {
1330             *info = -13;
1331         }
1332     }
1333
1334     if (*info != 0) {
1335         i__2 = -(*info);
1336         xerbla_("CGESVD", &i__2, (ftnlen)6);
1337         return 0;
1338     } else if (lquery) {
1339         return 0;
1340     }
1341
1342 /*     Quick return if possible */
1343
1344     if (*m == 0 || *n == 0) {
1345         return 0;
1346     }
1347
1348 /*     Get machine constants */
1349
1350     eps = slamch_("P");
1351     smlnum = sqrt(slamch_("S")) / eps;
1352     bignum = 1.f / smlnum;
1353
1354 /*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
1355
1356     anrm = clange_("M", m, n, &a[a_offset], lda, dum);
1357     iscl = 0;
1358     if (anrm > 0.f && anrm < smlnum) {
1359         iscl = 1;
1360         clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
1361                 ierr);
1362     } else if (anrm > bignum) {
1363         iscl = 1;
1364         clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
1365                 ierr);
1366     }
1367
1368     if (*m >= *n) {
1369
1370 /*        A has at least as many rows as columns. If A has sufficiently */
1371 /*        more rows than columns, first reduce using the QR */
1372 /*        decomposition (if sufficient workspace available) */
1373
1374         if (*m >= mnthr) {
1375
1376             if (wntun) {
1377
1378 /*              Path 1 (M much larger than N, JOBU='N') */
1379 /*              No left singular vectors to be computed */
1380
1381                 itau = 1;
1382                 iwork = itau + *n;
1383
1384 /*              Compute A=Q*R */
1385 /*              (CWorkspace: need 2*N, prefer N+N*NB) */
1386 /*              (RWorkspace: need 0) */
1387
1388                 i__2 = *lwork - iwork + 1;
1389                 cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
1390                         i__2, &ierr);
1391
1392 /*              Zero out below R */
1393
1394                 if (*n > 1) {
1395                     i__2 = *n - 1;
1396                     i__3 = *n - 1;
1397                     claset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[a_dim1 + 2], 
1398                             lda);
1399                 }
1400                 ie = 1;
1401                 itauq = 1;
1402                 itaup = itauq + *n;
1403                 iwork = itaup + *n;
1404
1405 /*              Bidiagonalize R in A */
1406 /*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
1407 /*              (RWorkspace: need N) */
1408
1409                 i__2 = *lwork - iwork + 1;
1410                 cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
1411                         itauq], &work[itaup], &work[iwork], &i__2, &ierr);
1412                 ncvt = 0;
1413                 if (wntvo || wntvas) {
1414
1415 /*                 If right singular vectors desired, generate P'. */
1416 /*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
1417 /*                 (RWorkspace: 0) */
1418
1419                     i__2 = *lwork - iwork + 1;
1420                     cungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
1421                             work[iwork], &i__2, &ierr);
1422                     ncvt = *n;
1423                 }
1424                 irwork = ie + *n;
1425
1426 /*              Perform bidiagonal QR iteration, computing right */
1427 /*              singular vectors of A in A if desired */
1428 /*              (CWorkspace: 0) */
1429 /*              (RWorkspace: need BDSPAC) */
1430
1431                 cbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &rwork[ie], &a[
1432                         a_offset], lda, cdum, &c__1, cdum, &c__1, &rwork[
1433                         irwork], info);
1434
1435 /*              If right singular vectors desired in VT, copy them there */
1436
1437                 if (wntvas) {
1438                     clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 
1439                             ldvt);
1440                 }
1441
1442             } else if (wntuo && wntvn) {
1443
1444 /*              Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
1445 /*              N left singular vectors to be overwritten on A and */
1446 /*              no right singular vectors to be computed */
1447
1448                 if (*lwork >= *n * *n + *n * 3) {
1449
1450 /*                 Sufficient workspace for a fast algorithm */
1451
1452                     ir = 1;
1453 /* Computing MAX */
1454                     i__2 = wrkbl, i__3 = *lda * *n;
1455                     if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
1456
1457 /*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N */
1458
1459                         ldwrku = *lda;
1460                         ldwrkr = *lda;
1461                     } else /* if(complicated condition) */ {
1462 /* Computing MAX */
1463                         i__2 = wrkbl, i__3 = *lda * *n;
1464                         if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
1465
1466 /*                    WORK(IU) is LDA by N, WORK(IR) is N by N */
1467
1468                             ldwrku = *lda;
1469                             ldwrkr = *n;
1470                         } else {
1471
1472 /*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
1473
1474                             ldwrku = (*lwork - *n * *n) / *n;
1475                             ldwrkr = *n;
1476                         }
1477                     }
1478                     itau = ir + ldwrkr * *n;
1479                     iwork = itau + *n;
1480
1481 /*                 Compute A=Q*R */
1482 /*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1483 /*                 (RWorkspace: 0) */
1484
1485                     i__2 = *lwork - iwork + 1;
1486                     cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1487                             , &i__2, &ierr);
1488
1489 /*                 Copy R to WORK(IR) and zero out below it */
1490
1491                     clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
1492                     i__2 = *n - 1;
1493                     i__3 = *n - 1;
1494                     claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1], &
1495                             ldwrkr);
1496
1497 /*                 Generate Q in A */
1498 /*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1499 /*                 (RWorkspace: 0) */
1500
1501                     i__2 = *lwork - iwork + 1;
1502                     cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1503                             iwork], &i__2, &ierr);
1504                     ie = 1;
1505                     itauq = itau;
1506                     itaup = itauq + *n;
1507                     iwork = itaup + *n;
1508
1509 /*                 Bidiagonalize R in WORK(IR) */
1510 /*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
1511 /*                 (RWorkspace: need N) */
1512
1513                     i__2 = *lwork - iwork + 1;
1514                     cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
1515                             work[itauq], &work[itaup], &work[iwork], &i__2, &
1516                             ierr);
1517
1518 /*                 Generate left vectors bidiagonalizing R */
1519 /*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
1520 /*                 (RWorkspace: need 0) */
1521
1522                     i__2 = *lwork - iwork + 1;
1523                     cungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1524                             work[iwork], &i__2, &ierr);
1525                     irwork = ie + *n;
1526
1527 /*                 Perform bidiagonal QR iteration, computing left */
1528 /*                 singular vectors of R in WORK(IR) */
1529 /*                 (CWorkspace: need N*N) */
1530 /*                 (RWorkspace: need BDSPAC) */
1531
1532                     cbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], cdum, 
1533                             &c__1, &work[ir], &ldwrkr, cdum, &c__1, &rwork[
1534                             irwork], info);
1535                     iu = itauq;
1536
1537 /*                 Multiply Q in A by left singular vectors of R in */
1538 /*                 WORK(IR), storing result in WORK(IU) and copying to A */
1539 /*                 (CWorkspace: need N*N+N, prefer N*N+M*N) */
1540 /*                 (RWorkspace: 0) */
1541
1542                     i__2 = *m;
1543                     i__3 = ldwrku;
1544                     for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
1545                              i__3) {
1546 /* Computing MIN */
1547                         i__4 = *m - i__ + 1;
1548                         chunk = f2cmin(i__4,ldwrku);
1549                         cgemm_("N", "N", &chunk, n, n, &c_b2, &a[i__ + a_dim1]
1550                                 , lda, &work[ir], &ldwrkr, &c_b1, &work[iu], &
1551                                 ldwrku);
1552                         clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
1553                                 a_dim1], lda);
1554 /* L10: */
1555                     }
1556
1557                 } else {
1558
1559 /*                 Insufficient workspace for a fast algorithm */
1560
1561                     ie = 1;
1562                     itauq = 1;
1563                     itaup = itauq + *n;
1564                     iwork = itaup + *n;
1565
1566 /*                 Bidiagonalize A */
1567 /*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) */
1568 /*                 (RWorkspace: N) */
1569
1570                     i__3 = *lwork - iwork + 1;
1571                     cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
1572                             itauq], &work[itaup], &work[iwork], &i__3, &ierr);
1573
1574 /*                 Generate left vectors bidiagonalizing A */
1575 /*                 (CWorkspace: need 3*N, prefer 2*N+N*NB) */
1576 /*                 (RWorkspace: 0) */
1577
1578                     i__3 = *lwork - iwork + 1;
1579                     cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
1580                             work[iwork], &i__3, &ierr);
1581                     irwork = ie + *n;
1582
1583 /*                 Perform bidiagonal QR iteration, computing left */
1584 /*                 singular vectors of A in A */
1585 /*                 (CWorkspace: need 0) */
1586 /*                 (RWorkspace: need BDSPAC) */
1587
1588                     cbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], cdum, 
1589                             &c__1, &a[a_offset], lda, cdum, &c__1, &rwork[
1590                             irwork], info);
1591
1592                 }
1593
1594             } else if (wntuo && wntvas) {
1595
1596 /*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */
1597 /*              N left singular vectors to be overwritten on A and */
1598 /*              N right singular vectors to be computed in VT */
1599
1600                 if (*lwork >= *n * *n + *n * 3) {
1601
1602 /*                 Sufficient workspace for a fast algorithm */
1603
1604                     ir = 1;
1605 /* Computing MAX */
1606                     i__3 = wrkbl, i__2 = *lda * *n;
1607                     if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
1608
1609 /*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1610
1611                         ldwrku = *lda;
1612                         ldwrkr = *lda;
1613                     } else /* if(complicated condition) */ {
1614 /* Computing MAX */
1615                         i__3 = wrkbl, i__2 = *lda * *n;
1616                         if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
1617
1618 /*                    WORK(IU) is LDA by N and WORK(IR) is N by N */
1619
1620                             ldwrku = *lda;
1621                             ldwrkr = *n;
1622                         } else {
1623
1624 /*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
1625
1626                             ldwrku = (*lwork - *n * *n) / *n;
1627                             ldwrkr = *n;
1628                         }
1629                     }
1630                     itau = ir + ldwrkr * *n;
1631                     iwork = itau + *n;
1632
1633 /*                 Compute A=Q*R */
1634 /*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1635 /*                 (RWorkspace: 0) */
1636
1637                     i__3 = *lwork - iwork + 1;
1638                     cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1639                             , &i__3, &ierr);
1640
1641 /*                 Copy R to VT, zeroing out below it */
1642
1643                     clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
1644                             ldvt);
1645                     if (*n > 1) {
1646                         i__3 = *n - 1;
1647                         i__2 = *n - 1;
1648                         claset_("L", &i__3, &i__2, &c_b1, &c_b1, &vt[vt_dim1 
1649                                 + 2], ldvt);
1650                     }
1651
1652 /*                 Generate Q in A */
1653 /*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1654 /*                 (RWorkspace: 0) */
1655
1656                     i__3 = *lwork - iwork + 1;
1657                     cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1658                             iwork], &i__3, &ierr);
1659                     ie = 1;
1660                     itauq = itau;
1661                     itaup = itauq + *n;
1662                     iwork = itaup + *n;
1663
1664 /*                 Bidiagonalize R in VT, copying result to WORK(IR) */
1665 /*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
1666 /*                 (RWorkspace: need N) */
1667
1668                     i__3 = *lwork - iwork + 1;
1669                     cgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], &
1670                             work[itauq], &work[itaup], &work[iwork], &i__3, &
1671                             ierr);
1672                     clacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
1673                             ldwrkr);
1674
1675 /*                 Generate left vectors bidiagonalizing R in WORK(IR) */
1676 /*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
1677 /*                 (RWorkspace: 0) */
1678
1679                     i__3 = *lwork - iwork + 1;
1680                     cungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1681                             work[iwork], &i__3, &ierr);
1682
1683 /*                 Generate right vectors bidiagonalizing R in VT */
1684 /*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) */
1685 /*                 (RWorkspace: 0) */
1686
1687                     i__3 = *lwork - iwork + 1;
1688                     cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
1689                             &work[iwork], &i__3, &ierr);
1690                     irwork = ie + *n;
1691
1692 /*                 Perform bidiagonal QR iteration, computing left */
1693 /*                 singular vectors of R in WORK(IR) and computing right */
1694 /*                 singular vectors of R in VT */
1695 /*                 (CWorkspace: need N*N) */
1696 /*                 (RWorkspace: need BDSPAC) */
1697
1698                     cbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[
1699                             vt_offset], ldvt, &work[ir], &ldwrkr, cdum, &c__1,
1700                              &rwork[irwork], info);
1701                     iu = itauq;
1702
1703 /*                 Multiply Q in A by left singular vectors of R in */
1704 /*                 WORK(IR), storing result in WORK(IU) and copying to A */
1705 /*                 (CWorkspace: need N*N+N, prefer N*N+M*N) */
1706 /*                 (RWorkspace: 0) */
1707
1708                     i__3 = *m;
1709                     i__2 = ldwrku;
1710                     for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
1711                              i__2) {
1712 /* Computing MIN */
1713                         i__4 = *m - i__ + 1;
1714                         chunk = f2cmin(i__4,ldwrku);
1715                         cgemm_("N", "N", &chunk, n, n, &c_b2, &a[i__ + a_dim1]
1716                                 , lda, &work[ir], &ldwrkr, &c_b1, &work[iu], &
1717                                 ldwrku);
1718                         clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
1719                                 a_dim1], lda);
1720 /* L20: */
1721                     }
1722
1723                 } else {
1724
1725 /*                 Insufficient workspace for a fast algorithm */
1726
1727                     itau = 1;
1728                     iwork = itau + *n;
1729
1730 /*                 Compute A=Q*R */
1731 /*                 (CWorkspace: need 2*N, prefer N+N*NB) */
1732 /*                 (RWorkspace: 0) */
1733
1734                     i__2 = *lwork - iwork + 1;
1735                     cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1736                             , &i__2, &ierr);
1737
1738 /*                 Copy R to VT, zeroing out below it */
1739
1740                     clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
1741                             ldvt);
1742                     if (*n > 1) {
1743                         i__2 = *n - 1;
1744                         i__3 = *n - 1;
1745                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[vt_dim1 
1746                                 + 2], ldvt);
1747                     }
1748
1749 /*                 Generate Q in A */
1750 /*                 (CWorkspace: need 2*N, prefer N+N*NB) */
1751 /*                 (RWorkspace: 0) */
1752
1753                     i__2 = *lwork - iwork + 1;
1754                     cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1755                             iwork], &i__2, &ierr);
1756                     ie = 1;
1757                     itauq = itau;
1758                     itaup = itauq + *n;
1759                     iwork = itaup + *n;
1760
1761 /*                 Bidiagonalize R in VT */
1762 /*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
1763 /*                 (RWorkspace: N) */
1764
1765                     i__2 = *lwork - iwork + 1;
1766                     cgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], &
1767                             work[itauq], &work[itaup], &work[iwork], &i__2, &
1768                             ierr);
1769
1770 /*                 Multiply Q in A by left vectors bidiagonalizing R */
1771 /*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
1772 /*                 (RWorkspace: 0) */
1773
1774                     i__2 = *lwork - iwork + 1;
1775                     cunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
1776                             work[itauq], &a[a_offset], lda, &work[iwork], &
1777                             i__2, &ierr);
1778
1779 /*                 Generate right vectors bidiagonalizing R in VT */
1780 /*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
1781 /*                 (RWorkspace: 0) */
1782
1783                     i__2 = *lwork - iwork + 1;
1784                     cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
1785                             &work[iwork], &i__2, &ierr);
1786                     irwork = ie + *n;
1787
1788 /*                 Perform bidiagonal QR iteration, computing left */
1789 /*                 singular vectors of A in A and computing right */
1790 /*                 singular vectors of A in VT */
1791 /*                 (CWorkspace: 0) */
1792 /*                 (RWorkspace: need BDSPAC) */
1793
1794                     cbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[
1795                             vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, 
1796                             &rwork[irwork], info);
1797
1798                 }
1799
1800             } else if (wntus) {
1801
1802                 if (wntvn) {
1803
1804 /*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
1805 /*                 N left singular vectors to be computed in U and */
1806 /*                 no right singular vectors to be computed */
1807
1808                     if (*lwork >= *n * *n + *n * 3) {
1809
1810 /*                    Sufficient workspace for a fast algorithm */
1811
1812                         ir = 1;
1813                         if (*lwork >= wrkbl + *lda * *n) {
1814
1815 /*                       WORK(IR) is LDA by N */
1816
1817                             ldwrkr = *lda;
1818                         } else {
1819
1820 /*                       WORK(IR) is N by N */
1821
1822                             ldwrkr = *n;
1823                         }
1824                         itau = ir + ldwrkr * *n;
1825                         iwork = itau + *n;
1826
1827 /*                    Compute A=Q*R */
1828 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1829 /*                    (RWorkspace: 0) */
1830
1831                         i__2 = *lwork - iwork + 1;
1832                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1833                                 iwork], &i__2, &ierr);
1834
1835 /*                    Copy R to WORK(IR), zeroing out below it */
1836
1837                         clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
1838                                 ldwrkr);
1839                         i__2 = *n - 1;
1840                         i__3 = *n - 1;
1841                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1]
1842                                 , &ldwrkr);
1843
1844 /*                    Generate Q in A */
1845 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
1846 /*                    (RWorkspace: 0) */
1847
1848                         i__2 = *lwork - iwork + 1;
1849                         cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &
1850                                 work[iwork], &i__2, &ierr);
1851                         ie = 1;
1852                         itauq = itau;
1853                         itaup = itauq + *n;
1854                         iwork = itaup + *n;
1855
1856 /*                    Bidiagonalize R in WORK(IR) */
1857 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
1858 /*                    (RWorkspace: need N) */
1859
1860                         i__2 = *lwork - iwork + 1;
1861                         cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
1862                                 work[itauq], &work[itaup], &work[iwork], &
1863                                 i__2, &ierr);
1864
1865 /*                    Generate left vectors bidiagonalizing R in WORK(IR) */
1866 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
1867 /*                    (RWorkspace: 0) */
1868
1869                         i__2 = *lwork - iwork + 1;
1870                         cungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
1871                                 , &work[iwork], &i__2, &ierr);
1872                         irwork = ie + *n;
1873
1874 /*                    Perform bidiagonal QR iteration, computing left */
1875 /*                    singular vectors of R in WORK(IR) */
1876 /*                    (CWorkspace: need N*N) */
1877 /*                    (RWorkspace: need BDSPAC) */
1878
1879                         cbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], 
1880                                 cdum, &c__1, &work[ir], &ldwrkr, cdum, &c__1, 
1881                                 &rwork[irwork], info);
1882
1883 /*                    Multiply Q in A by left singular vectors of R in */
1884 /*                    WORK(IR), storing result in U */
1885 /*                    (CWorkspace: need N*N) */
1886 /*                    (RWorkspace: 0) */
1887
1888                         cgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, &
1889                                 work[ir], &ldwrkr, &c_b1, &u[u_offset], ldu);
1890
1891                     } else {
1892
1893 /*                    Insufficient workspace for a fast algorithm */
1894
1895                         itau = 1;
1896                         iwork = itau + *n;
1897
1898 /*                    Compute A=Q*R, copying result to U */
1899 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
1900 /*                    (RWorkspace: 0) */
1901
1902                         i__2 = *lwork - iwork + 1;
1903                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1904                                 iwork], &i__2, &ierr);
1905                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
1906                                 ldu);
1907
1908 /*                    Generate Q in U */
1909 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
1910 /*                    (RWorkspace: 0) */
1911
1912                         i__2 = *lwork - iwork + 1;
1913                         cungqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
1914                                 work[iwork], &i__2, &ierr);
1915                         ie = 1;
1916                         itauq = itau;
1917                         itaup = itauq + *n;
1918                         iwork = itaup + *n;
1919
1920 /*                    Zero out below R in A */
1921
1922                         if (*n > 1) {
1923                             i__2 = *n - 1;
1924                             i__3 = *n - 1;
1925                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[
1926                                     a_dim1 + 2], lda);
1927                         }
1928
1929 /*                    Bidiagonalize R in A */
1930 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
1931 /*                    (RWorkspace: need N) */
1932
1933                         i__2 = *lwork - iwork + 1;
1934                         cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &
1935                                 work[itauq], &work[itaup], &work[iwork], &
1936                                 i__2, &ierr);
1937
1938 /*                    Multiply Q in U by left vectors bidiagonalizing R */
1939 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
1940 /*                    (RWorkspace: 0) */
1941
1942                         i__2 = *lwork - iwork + 1;
1943                         cunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
1944                                 work[itauq], &u[u_offset], ldu, &work[iwork], 
1945                                 &i__2, &ierr)
1946                                 ;
1947                         irwork = ie + *n;
1948
1949 /*                    Perform bidiagonal QR iteration, computing left */
1950 /*                    singular vectors of A in U */
1951 /*                    (CWorkspace: 0) */
1952 /*                    (RWorkspace: need BDSPAC) */
1953
1954                         cbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], 
1955                                 cdum, &c__1, &u[u_offset], ldu, cdum, &c__1, &
1956                                 rwork[irwork], info);
1957
1958                     }
1959
1960                 } else if (wntvo) {
1961
1962 /*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
1963 /*                 N left singular vectors to be computed in U and */
1964 /*                 N right singular vectors to be overwritten on A */
1965
1966                     if (*lwork >= (*n << 1) * *n + *n * 3) {
1967
1968 /*                    Sufficient workspace for a fast algorithm */
1969
1970                         iu = 1;
1971                         if (*lwork >= wrkbl + (*lda << 1) * *n) {
1972
1973 /*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1974
1975                             ldwrku = *lda;
1976                             ir = iu + ldwrku * *n;
1977                             ldwrkr = *lda;
1978                         } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
1979
1980 /*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
1981
1982                             ldwrku = *lda;
1983                             ir = iu + ldwrku * *n;
1984                             ldwrkr = *n;
1985                         } else {
1986
1987 /*                       WORK(IU) is N by N and WORK(IR) is N by N */
1988
1989                             ldwrku = *n;
1990                             ir = iu + ldwrku * *n;
1991                             ldwrkr = *n;
1992                         }
1993                         itau = ir + ldwrkr * *n;
1994                         iwork = itau + *n;
1995
1996 /*                    Compute A=Q*R */
1997 /*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
1998 /*                    (RWorkspace: 0) */
1999
2000                         i__2 = *lwork - iwork + 1;
2001                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2002                                 iwork], &i__2, &ierr);
2003
2004 /*                    Copy R to WORK(IU), zeroing out below it */
2005
2006                         clacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2007                                 ldwrku);
2008                         i__2 = *n - 1;
2009                         i__3 = *n - 1;
2010                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1]
2011                                 , &ldwrku);
2012
2013 /*                    Generate Q in A */
2014 /*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2015 /*                    (RWorkspace: 0) */
2016
2017                         i__2 = *lwork - iwork + 1;
2018                         cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &
2019                                 work[iwork], &i__2, &ierr);
2020                         ie = 1;
2021                         itauq = itau;
2022                         itaup = itauq + *n;
2023                         iwork = itaup + *n;
2024
2025 /*                    Bidiagonalize R in WORK(IU), copying result to */
2026 /*                    WORK(IR) */
2027 /*                    (CWorkspace: need   2*N*N+3*N, */
2028 /*                                 prefer 2*N*N+2*N+2*N*NB) */
2029 /*                    (RWorkspace: need   N) */
2030
2031                         i__2 = *lwork - iwork + 1;
2032                         cgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], &
2033                                 work[itauq], &work[itaup], &work[iwork], &
2034                                 i__2, &ierr);
2035                         clacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
2036                                 ldwrkr);
2037
2038 /*                    Generate left bidiagonalizing vectors in WORK(IU) */
2039 /*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */
2040 /*                    (RWorkspace: 0) */
2041
2042                         i__2 = *lwork - iwork + 1;
2043                         cungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2044                                 , &work[iwork], &i__2, &ierr);
2045
2046 /*                    Generate right bidiagonalizing vectors in WORK(IR) */
2047 /*                    (CWorkspace: need   2*N*N+3*N-1, */
2048 /*                                 prefer 2*N*N+2*N+(N-1)*NB) */
2049 /*                    (RWorkspace: 0) */
2050
2051                         i__2 = *lwork - iwork + 1;
2052                         cungbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
2053                                 , &work[iwork], &i__2, &ierr);
2054                         irwork = ie + *n;
2055
2056 /*                    Perform bidiagonal QR iteration, computing left */
2057 /*                    singular vectors of R in WORK(IU) and computing */
2058 /*                    right singular vectors of R in WORK(IR) */
2059 /*                    (CWorkspace: need 2*N*N) */
2060 /*                    (RWorkspace: need BDSPAC) */
2061
2062                         cbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &work[
2063                                 ir], &ldwrkr, &work[iu], &ldwrku, cdum, &c__1,
2064                                  &rwork[irwork], info);
2065
2066 /*                    Multiply Q in A by left singular vectors of R in */
2067 /*                    WORK(IU), storing result in U */
2068 /*                    (CWorkspace: need N*N) */
2069 /*                    (RWorkspace: 0) */
2070
2071                         cgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, &
2072                                 work[iu], &ldwrku, &c_b1, &u[u_offset], ldu);
2073
2074 /*                    Copy right singular vectors of R to A */
2075 /*                    (CWorkspace: need N*N) */
2076 /*                    (RWorkspace: 0) */
2077
2078                         clacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
2079                                 lda);
2080
2081                     } else {
2082
2083 /*                    Insufficient workspace for a fast algorithm */
2084
2085                         itau = 1;
2086                         iwork = itau + *n;
2087
2088 /*                    Compute A=Q*R, copying result to U */
2089 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2090 /*                    (RWorkspace: 0) */
2091
2092                         i__2 = *lwork - iwork + 1;
2093                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2094                                 iwork], &i__2, &ierr);
2095                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2096                                 ldu);
2097
2098 /*                    Generate Q in U */
2099 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2100 /*                    (RWorkspace: 0) */
2101
2102                         i__2 = *lwork - iwork + 1;
2103                         cungqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
2104                                 work[iwork], &i__2, &ierr);
2105                         ie = 1;
2106                         itauq = itau;
2107                         itaup = itauq + *n;
2108                         iwork = itaup + *n;
2109
2110 /*                    Zero out below R in A */
2111
2112                         if (*n > 1) {
2113                             i__2 = *n - 1;
2114                             i__3 = *n - 1;
2115                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[
2116                                     a_dim1 + 2], lda);
2117                         }
2118
2119 /*                    Bidiagonalize R in A */
2120 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
2121 /*                    (RWorkspace: need N) */
2122
2123                         i__2 = *lwork - iwork + 1;
2124                         cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &
2125                                 work[itauq], &work[itaup], &work[iwork], &
2126                                 i__2, &ierr);
2127
2128 /*                    Multiply Q in U by left vectors bidiagonalizing R */
2129 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
2130 /*                    (RWorkspace: 0) */
2131
2132                         i__2 = *lwork - iwork + 1;
2133                         cunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2134                                 work[itauq], &u[u_offset], ldu, &work[iwork], 
2135                                 &i__2, &ierr)
2136                                 ;
2137
2138 /*                    Generate right vectors bidiagonalizing R in A */
2139 /*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2140 /*                    (RWorkspace: 0) */
2141
2142                         i__2 = *lwork - iwork + 1;
2143                         cungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
2144                                  &work[iwork], &i__2, &ierr);
2145                         irwork = ie + *n;
2146
2147 /*                    Perform bidiagonal QR iteration, computing left */
2148 /*                    singular vectors of A in U and computing right */
2149 /*                    singular vectors of A in A */
2150 /*                    (CWorkspace: 0) */
2151 /*                    (RWorkspace: need BDSPAC) */
2152
2153                         cbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &a[
2154                                 a_offset], lda, &u[u_offset], ldu, cdum, &
2155                                 c__1, &rwork[irwork], info);
2156
2157                     }
2158
2159                 } else if (wntvas) {
2160
2161 /*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' */
2162 /*                         or 'A') */
2163 /*                 N left singular vectors to be computed in U and */
2164 /*                 N right singular vectors to be computed in VT */
2165
2166                     if (*lwork >= *n * *n + *n * 3) {
2167
2168 /*                    Sufficient workspace for a fast algorithm */
2169
2170                         iu = 1;
2171                         if (*lwork >= wrkbl + *lda * *n) {
2172
2173 /*                       WORK(IU) is LDA by N */
2174
2175                             ldwrku = *lda;
2176                         } else {
2177
2178 /*                       WORK(IU) is N by N */
2179
2180                             ldwrku = *n;
2181                         }
2182                         itau = iu + ldwrku * *n;
2183                         iwork = itau + *n;
2184
2185 /*                    Compute A=Q*R */
2186 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
2187 /*                    (RWorkspace: 0) */
2188
2189                         i__2 = *lwork - iwork + 1;
2190                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2191                                 iwork], &i__2, &ierr);
2192
2193 /*                    Copy R to WORK(IU), zeroing out below it */
2194
2195                         clacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2196                                 ldwrku);
2197                         i__2 = *n - 1;
2198                         i__3 = *n - 1;
2199                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1]
2200                                 , &ldwrku);
2201
2202 /*                    Generate Q in A */
2203 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
2204 /*                    (RWorkspace: 0) */
2205
2206                         i__2 = *lwork - iwork + 1;
2207                         cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &
2208                                 work[iwork], &i__2, &ierr);
2209                         ie = 1;
2210                         itauq = itau;
2211                         itaup = itauq + *n;
2212                         iwork = itaup + *n;
2213
2214 /*                    Bidiagonalize R in WORK(IU), copying result to VT */
2215 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
2216 /*                    (RWorkspace: need N) */
2217
2218                         i__2 = *lwork - iwork + 1;
2219                         cgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], &
2220                                 work[itauq], &work[itaup], &work[iwork], &
2221                                 i__2, &ierr);
2222                         clacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
2223                                  ldvt);
2224
2225 /*                    Generate left bidiagonalizing vectors in WORK(IU) */
2226 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
2227 /*                    (RWorkspace: 0) */
2228
2229                         i__2 = *lwork - iwork + 1;
2230                         cungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2231                                 , &work[iwork], &i__2, &ierr);
2232
2233 /*                    Generate right bidiagonalizing vectors in VT */
2234 /*                    (CWorkspace: need   N*N+3*N-1, */
2235 /*                                 prefer N*N+2*N+(N-1)*NB) */
2236 /*                    (RWorkspace: 0) */
2237
2238                         i__2 = *lwork - iwork + 1;
2239                         cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2240                                 itaup], &work[iwork], &i__2, &ierr)
2241                                 ;
2242                         irwork = ie + *n;
2243
2244 /*                    Perform bidiagonal QR iteration, computing left */
2245 /*                    singular vectors of R in WORK(IU) and computing */
2246 /*                    right singular vectors of R in VT */
2247 /*                    (CWorkspace: need N*N) */
2248 /*                    (RWorkspace: need BDSPAC) */
2249
2250                         cbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[
2251                                 vt_offset], ldvt, &work[iu], &ldwrku, cdum, &
2252                                 c__1, &rwork[irwork], info);
2253
2254 /*                    Multiply Q in A by left singular vectors of R in */
2255 /*                    WORK(IU), storing result in U */
2256 /*                    (CWorkspace: need N*N) */
2257 /*                    (RWorkspace: 0) */
2258
2259                         cgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, &
2260                                 work[iu], &ldwrku, &c_b1, &u[u_offset], ldu);
2261
2262                     } else {
2263
2264 /*                    Insufficient workspace for a fast algorithm */
2265
2266                         itau = 1;
2267                         iwork = itau + *n;
2268
2269 /*                    Compute A=Q*R, copying result to U */
2270 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2271 /*                    (RWorkspace: 0) */
2272
2273                         i__2 = *lwork - iwork + 1;
2274                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2275                                 iwork], &i__2, &ierr);
2276                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2277                                 ldu);
2278
2279 /*                    Generate Q in U */
2280 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2281 /*                    (RWorkspace: 0) */
2282
2283                         i__2 = *lwork - iwork + 1;
2284                         cungqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
2285                                 work[iwork], &i__2, &ierr);
2286
2287 /*                    Copy R to VT, zeroing out below it */
2288
2289                         clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
2290                                 ldvt);
2291                         if (*n > 1) {
2292                             i__2 = *n - 1;
2293                             i__3 = *n - 1;
2294                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[
2295                                     vt_dim1 + 2], ldvt);
2296                         }
2297                         ie = 1;
2298                         itauq = itau;
2299                         itaup = itauq + *n;
2300                         iwork = itaup + *n;
2301
2302 /*                    Bidiagonalize R in VT */
2303 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
2304 /*                    (RWorkspace: need N) */
2305
2306                         i__2 = *lwork - iwork + 1;
2307                         cgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie],
2308                                  &work[itauq], &work[itaup], &work[iwork], &
2309                                 i__2, &ierr);
2310
2311 /*                    Multiply Q in U by left bidiagonalizing vectors */
2312 /*                    in VT */
2313 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
2314 /*                    (RWorkspace: 0) */
2315
2316                         i__2 = *lwork - iwork + 1;
2317                         cunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
2318                                 &work[itauq], &u[u_offset], ldu, &work[iwork],
2319                                  &i__2, &ierr);
2320
2321 /*                    Generate right bidiagonalizing vectors in VT */
2322 /*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2323 /*                    (RWorkspace: 0) */
2324
2325                         i__2 = *lwork - iwork + 1;
2326                         cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2327                                 itaup], &work[iwork], &i__2, &ierr)
2328                                 ;
2329                         irwork = ie + *n;
2330
2331 /*                    Perform bidiagonal QR iteration, computing left */
2332 /*                    singular vectors of A in U and computing right */
2333 /*                    singular vectors of A in VT */
2334 /*                    (CWorkspace: 0) */
2335 /*                    (RWorkspace: need BDSPAC) */
2336
2337                         cbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[
2338                                 vt_offset], ldvt, &u[u_offset], ldu, cdum, &
2339                                 c__1, &rwork[irwork], info);
2340
2341                     }
2342
2343                 }
2344
2345             } else if (wntua) {
2346
2347                 if (wntvn) {
2348
2349 /*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
2350 /*                 M left singular vectors to be computed in U and */
2351 /*                 no right singular vectors to be computed */
2352
2353 /* Computing MAX */
2354                     i__2 = *n + *m, i__3 = *n * 3;
2355                     if (*lwork >= *n * *n + f2cmax(i__2,i__3)) {
2356
2357 /*                    Sufficient workspace for a fast algorithm */
2358
2359                         ir = 1;
2360                         if (*lwork >= wrkbl + *lda * *n) {
2361
2362 /*                       WORK(IR) is LDA by N */
2363
2364                             ldwrkr = *lda;
2365                         } else {
2366
2367 /*                       WORK(IR) is N by N */
2368
2369                             ldwrkr = *n;
2370                         }
2371                         itau = ir + ldwrkr * *n;
2372                         iwork = itau + *n;
2373
2374 /*                    Compute A=Q*R, copying result to U */
2375 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
2376 /*                    (RWorkspace: 0) */
2377
2378                         i__2 = *lwork - iwork + 1;
2379                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2380                                 iwork], &i__2, &ierr);
2381                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2382                                 ldu);
2383
2384 /*                    Copy R to WORK(IR), zeroing out below it */
2385
2386                         clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
2387                                 ldwrkr);
2388                         i__2 = *n - 1;
2389                         i__3 = *n - 1;
2390                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1]
2391                                 , &ldwrkr);
2392
2393 /*                    Generate Q in U */
2394 /*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) */
2395 /*                    (RWorkspace: 0) */
2396
2397                         i__2 = *lwork - iwork + 1;
2398                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2399                                 work[iwork], &i__2, &ierr);
2400                         ie = 1;
2401                         itauq = itau;
2402                         itaup = itauq + *n;
2403                         iwork = itaup + *n;
2404
2405 /*                    Bidiagonalize R in WORK(IR) */
2406 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
2407 /*                    (RWorkspace: need N) */
2408
2409                         i__2 = *lwork - iwork + 1;
2410                         cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
2411                                 work[itauq], &work[itaup], &work[iwork], &
2412                                 i__2, &ierr);
2413
2414 /*                    Generate left bidiagonalizing vectors in WORK(IR) */
2415 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
2416 /*                    (RWorkspace: 0) */
2417
2418                         i__2 = *lwork - iwork + 1;
2419                         cungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
2420                                 , &work[iwork], &i__2, &ierr);
2421                         irwork = ie + *n;
2422
2423 /*                    Perform bidiagonal QR iteration, computing left */
2424 /*                    singular vectors of R in WORK(IR) */
2425 /*                    (CWorkspace: need N*N) */
2426 /*                    (RWorkspace: need BDSPAC) */
2427
2428                         cbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], 
2429                                 cdum, &c__1, &work[ir], &ldwrkr, cdum, &c__1, 
2430                                 &rwork[irwork], info);
2431
2432 /*                    Multiply Q in U by left singular vectors of R in */
2433 /*                    WORK(IR), storing result in A */
2434 /*                    (CWorkspace: need N*N) */
2435 /*                    (RWorkspace: 0) */
2436
2437                         cgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, &
2438                                 work[ir], &ldwrkr, &c_b1, &a[a_offset], lda);
2439
2440 /*                    Copy left singular vectors of A from A to U */
2441
2442                         clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2443                                 ldu);
2444
2445                     } else {
2446
2447 /*                    Insufficient workspace for a fast algorithm */
2448
2449                         itau = 1;
2450                         iwork = itau + *n;
2451
2452 /*                    Compute A=Q*R, copying result to U */
2453 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2454 /*                    (RWorkspace: 0) */
2455
2456                         i__2 = *lwork - iwork + 1;
2457                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2458                                 iwork], &i__2, &ierr);
2459                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2460                                 ldu);
2461
2462 /*                    Generate Q in U */
2463 /*                    (CWorkspace: need N+M, prefer N+M*NB) */
2464 /*                    (RWorkspace: 0) */
2465
2466                         i__2 = *lwork - iwork + 1;
2467                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2468                                 work[iwork], &i__2, &ierr);
2469                         ie = 1;
2470                         itauq = itau;
2471                         itaup = itauq + *n;
2472                         iwork = itaup + *n;
2473
2474 /*                    Zero out below R in A */
2475
2476                         if (*n > 1) {
2477                             i__2 = *n - 1;
2478                             i__3 = *n - 1;
2479                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[
2480                                     a_dim1 + 2], lda);
2481                         }
2482
2483 /*                    Bidiagonalize R in A */
2484 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
2485 /*                    (RWorkspace: need N) */
2486
2487                         i__2 = *lwork - iwork + 1;
2488                         cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &
2489                                 work[itauq], &work[itaup], &work[iwork], &
2490                                 i__2, &ierr);
2491
2492 /*                    Multiply Q in U by left bidiagonalizing vectors */
2493 /*                    in A */
2494 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
2495 /*                    (RWorkspace: 0) */
2496
2497                         i__2 = *lwork - iwork + 1;
2498                         cunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2499                                 work[itauq], &u[u_offset], ldu, &work[iwork], 
2500                                 &i__2, &ierr)
2501                                 ;
2502                         irwork = ie + *n;
2503
2504 /*                    Perform bidiagonal QR iteration, computing left */
2505 /*                    singular vectors of A in U */
2506 /*                    (CWorkspace: 0) */
2507 /*                    (RWorkspace: need BDSPAC) */
2508
2509                         cbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], 
2510                                 cdum, &c__1, &u[u_offset], ldu, cdum, &c__1, &
2511                                 rwork[irwork], info);
2512
2513                     }
2514
2515                 } else if (wntvo) {
2516
2517 /*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
2518 /*                 M left singular vectors to be computed in U and */
2519 /*                 N right singular vectors to be overwritten on A */
2520
2521 /* Computing MAX */
2522                     i__2 = *n + *m, i__3 = *n * 3;
2523                     if (*lwork >= (*n << 1) * *n + f2cmax(i__2,i__3)) {
2524
2525 /*                    Sufficient workspace for a fast algorithm */
2526
2527                         iu = 1;
2528                         if (*lwork >= wrkbl + (*lda << 1) * *n) {
2529
2530 /*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
2531
2532                             ldwrku = *lda;
2533                             ir = iu + ldwrku * *n;
2534                             ldwrkr = *lda;
2535                         } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
2536
2537 /*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
2538
2539                             ldwrku = *lda;
2540                             ir = iu + ldwrku * *n;
2541                             ldwrkr = *n;
2542                         } else {
2543
2544 /*                       WORK(IU) is N by N and WORK(IR) is N by N */
2545
2546                             ldwrku = *n;
2547                             ir = iu + ldwrku * *n;
2548                             ldwrkr = *n;
2549                         }
2550                         itau = ir + ldwrkr * *n;
2551                         iwork = itau + *n;
2552
2553 /*                    Compute A=Q*R, copying result to U */
2554 /*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2555 /*                    (RWorkspace: 0) */
2556
2557                         i__2 = *lwork - iwork + 1;
2558                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2559                                 iwork], &i__2, &ierr);
2560                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2561                                 ldu);
2562
2563 /*                    Generate Q in U */
2564 /*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */
2565 /*                    (RWorkspace: 0) */
2566
2567                         i__2 = *lwork - iwork + 1;
2568                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2569                                 work[iwork], &i__2, &ierr);
2570
2571 /*                    Copy R to WORK(IU), zeroing out below it */
2572
2573                         clacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2574                                 ldwrku);
2575                         i__2 = *n - 1;
2576                         i__3 = *n - 1;
2577                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1]
2578                                 , &ldwrku);
2579                         ie = 1;
2580                         itauq = itau;
2581                         itaup = itauq + *n;
2582                         iwork = itaup + *n;
2583
2584 /*                    Bidiagonalize R in WORK(IU), copying result to */
2585 /*                    WORK(IR) */
2586 /*                    (CWorkspace: need   2*N*N+3*N, */
2587 /*                                 prefer 2*N*N+2*N+2*N*NB) */
2588 /*                    (RWorkspace: need   N) */
2589
2590                         i__2 = *lwork - iwork + 1;
2591                         cgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], &
2592                                 work[itauq], &work[itaup], &work[iwork], &
2593                                 i__2, &ierr);
2594                         clacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
2595                                 ldwrkr);
2596
2597 /*                    Generate left bidiagonalizing vectors in WORK(IU) */
2598 /*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */
2599 /*                    (RWorkspace: 0) */
2600
2601                         i__2 = *lwork - iwork + 1;
2602                         cungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2603                                 , &work[iwork], &i__2, &ierr);
2604
2605 /*                    Generate right bidiagonalizing vectors in WORK(IR) */
2606 /*                    (CWorkspace: need   2*N*N+3*N-1, */
2607 /*                                 prefer 2*N*N+2*N+(N-1)*NB) */
2608 /*                    (RWorkspace: 0) */
2609
2610                         i__2 = *lwork - iwork + 1;
2611                         cungbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
2612                                 , &work[iwork], &i__2, &ierr);
2613                         irwork = ie + *n;
2614
2615 /*                    Perform bidiagonal QR iteration, computing left */
2616 /*                    singular vectors of R in WORK(IU) and computing */
2617 /*                    right singular vectors of R in WORK(IR) */
2618 /*                    (CWorkspace: need 2*N*N) */
2619 /*                    (RWorkspace: need BDSPAC) */
2620
2621                         cbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &work[
2622                                 ir], &ldwrkr, &work[iu], &ldwrku, cdum, &c__1,
2623                                  &rwork[irwork], info);
2624
2625 /*                    Multiply Q in U by left singular vectors of R in */
2626 /*                    WORK(IU), storing result in A */
2627 /*                    (CWorkspace: need N*N) */
2628 /*                    (RWorkspace: 0) */
2629
2630                         cgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, &
2631                                 work[iu], &ldwrku, &c_b1, &a[a_offset], lda);
2632
2633 /*                    Copy left singular vectors of A from A to U */
2634
2635                         clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2636                                 ldu);
2637
2638 /*                    Copy right singular vectors of R from WORK(IR) to A */
2639
2640                         clacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
2641                                 lda);
2642
2643                     } else {
2644
2645 /*                    Insufficient workspace for a fast algorithm */
2646
2647                         itau = 1;
2648                         iwork = itau + *n;
2649
2650 /*                    Compute A=Q*R, copying result to U */
2651 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2652 /*                    (RWorkspace: 0) */
2653
2654                         i__2 = *lwork - iwork + 1;
2655                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2656                                 iwork], &i__2, &ierr);
2657                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2658                                 ldu);
2659
2660 /*                    Generate Q in U */
2661 /*                    (CWorkspace: need N+M, prefer N+M*NB) */
2662 /*                    (RWorkspace: 0) */
2663
2664                         i__2 = *lwork - iwork + 1;
2665                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2666                                 work[iwork], &i__2, &ierr);
2667                         ie = 1;
2668                         itauq = itau;
2669                         itaup = itauq + *n;
2670                         iwork = itaup + *n;
2671
2672 /*                    Zero out below R in A */
2673
2674                         if (*n > 1) {
2675                             i__2 = *n - 1;
2676                             i__3 = *n - 1;
2677                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[
2678                                     a_dim1 + 2], lda);
2679                         }
2680
2681 /*                    Bidiagonalize R in A */
2682 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
2683 /*                    (RWorkspace: need N) */
2684
2685                         i__2 = *lwork - iwork + 1;
2686                         cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &
2687                                 work[itauq], &work[itaup], &work[iwork], &
2688                                 i__2, &ierr);
2689
2690 /*                    Multiply Q in U by left bidiagonalizing vectors */
2691 /*                    in A */
2692 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
2693 /*                    (RWorkspace: 0) */
2694
2695                         i__2 = *lwork - iwork + 1;
2696                         cunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2697                                 work[itauq], &u[u_offset], ldu, &work[iwork], 
2698                                 &i__2, &ierr)
2699                                 ;
2700
2701 /*                    Generate right bidiagonalizing vectors in A */
2702 /*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2703 /*                    (RWorkspace: 0) */
2704
2705                         i__2 = *lwork - iwork + 1;
2706                         cungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
2707                                  &work[iwork], &i__2, &ierr);
2708                         irwork = ie + *n;
2709
2710 /*                    Perform bidiagonal QR iteration, computing left */
2711 /*                    singular vectors of A in U and computing right */
2712 /*                    singular vectors of A in A */
2713 /*                    (CWorkspace: 0) */
2714 /*                    (RWorkspace: need BDSPAC) */
2715
2716                         cbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &a[
2717                                 a_offset], lda, &u[u_offset], ldu, cdum, &
2718                                 c__1, &rwork[irwork], info);
2719
2720                     }
2721
2722                 } else if (wntvas) {
2723
2724 /*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' */
2725 /*                         or 'A') */
2726 /*                 M left singular vectors to be computed in U and */
2727 /*                 N right singular vectors to be computed in VT */
2728
2729 /* Computing MAX */
2730                     i__2 = *n + *m, i__3 = *n * 3;
2731                     if (*lwork >= *n * *n + f2cmax(i__2,i__3)) {
2732
2733 /*                    Sufficient workspace for a fast algorithm */
2734
2735                         iu = 1;
2736                         if (*lwork >= wrkbl + *lda * *n) {
2737
2738 /*                       WORK(IU) is LDA by N */
2739
2740                             ldwrku = *lda;
2741                         } else {
2742
2743 /*                       WORK(IU) is N by N */
2744
2745                             ldwrku = *n;
2746                         }
2747                         itau = iu + ldwrku * *n;
2748                         iwork = itau + *n;
2749
2750 /*                    Compute A=Q*R, copying result to U */
2751 /*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */
2752 /*                    (RWorkspace: 0) */
2753
2754                         i__2 = *lwork - iwork + 1;
2755                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2756                                 iwork], &i__2, &ierr);
2757                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2758                                 ldu);
2759
2760 /*                    Generate Q in U */
2761 /*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) */
2762 /*                    (RWorkspace: 0) */
2763
2764                         i__2 = *lwork - iwork + 1;
2765                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2766                                 work[iwork], &i__2, &ierr);
2767
2768 /*                    Copy R to WORK(IU), zeroing out below it */
2769
2770                         clacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2771                                 ldwrku);
2772                         i__2 = *n - 1;
2773                         i__3 = *n - 1;
2774                         claset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1]
2775                                 , &ldwrku);
2776                         ie = 1;
2777                         itauq = itau;
2778                         itaup = itauq + *n;
2779                         iwork = itaup + *n;
2780
2781 /*                    Bidiagonalize R in WORK(IU), copying result to VT */
2782 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */
2783 /*                    (RWorkspace: need N) */
2784
2785                         i__2 = *lwork - iwork + 1;
2786                         cgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], &
2787                                 work[itauq], &work[itaup], &work[iwork], &
2788                                 i__2, &ierr);
2789                         clacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
2790                                  ldvt);
2791
2792 /*                    Generate left bidiagonalizing vectors in WORK(IU) */
2793 /*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */
2794 /*                    (RWorkspace: 0) */
2795
2796                         i__2 = *lwork - iwork + 1;
2797                         cungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2798                                 , &work[iwork], &i__2, &ierr);
2799
2800 /*                    Generate right bidiagonalizing vectors in VT */
2801 /*                    (CWorkspace: need   N*N+3*N-1, */
2802 /*                                 prefer N*N+2*N+(N-1)*NB) */
2803 /*                    (RWorkspace: need   0) */
2804
2805                         i__2 = *lwork - iwork + 1;
2806                         cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2807                                 itaup], &work[iwork], &i__2, &ierr)
2808                                 ;
2809                         irwork = ie + *n;
2810
2811 /*                    Perform bidiagonal QR iteration, computing left */
2812 /*                    singular vectors of R in WORK(IU) and computing */
2813 /*                    right singular vectors of R in VT */
2814 /*                    (CWorkspace: need N*N) */
2815 /*                    (RWorkspace: need BDSPAC) */
2816
2817                         cbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[
2818                                 vt_offset], ldvt, &work[iu], &ldwrku, cdum, &
2819                                 c__1, &rwork[irwork], info);
2820
2821 /*                    Multiply Q in U by left singular vectors of R in */
2822 /*                    WORK(IU), storing result in A */
2823 /*                    (CWorkspace: need N*N) */
2824 /*                    (RWorkspace: 0) */
2825
2826                         cgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, &
2827                                 work[iu], &ldwrku, &c_b1, &a[a_offset], lda);
2828
2829 /*                    Copy left singular vectors of A from A to U */
2830
2831                         clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2832                                 ldu);
2833
2834                     } else {
2835
2836 /*                    Insufficient workspace for a fast algorithm */
2837
2838                         itau = 1;
2839                         iwork = itau + *n;
2840
2841 /*                    Compute A=Q*R, copying result to U */
2842 /*                    (CWorkspace: need 2*N, prefer N+N*NB) */
2843 /*                    (RWorkspace: 0) */
2844
2845                         i__2 = *lwork - iwork + 1;
2846                         cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2847                                 iwork], &i__2, &ierr);
2848                         clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2849                                 ldu);
2850
2851 /*                    Generate Q in U */
2852 /*                    (CWorkspace: need N+M, prefer N+M*NB) */
2853 /*                    (RWorkspace: 0) */
2854
2855                         i__2 = *lwork - iwork + 1;
2856                         cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2857                                 work[iwork], &i__2, &ierr);
2858
2859 /*                    Copy R from A to VT, zeroing out below it */
2860
2861                         clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
2862                                 ldvt);
2863                         if (*n > 1) {
2864                             i__2 = *n - 1;
2865                             i__3 = *n - 1;
2866                             claset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[
2867                                     vt_dim1 + 2], ldvt);
2868                         }
2869                         ie = 1;
2870                         itauq = itau;
2871                         itaup = itauq + *n;
2872                         iwork = itaup + *n;
2873
2874 /*                    Bidiagonalize R in VT */
2875 /*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */
2876 /*                    (RWorkspace: need N) */
2877
2878                         i__2 = *lwork - iwork + 1;
2879                         cgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie],
2880                                  &work[itauq], &work[itaup], &work[iwork], &
2881                                 i__2, &ierr);
2882
2883 /*                    Multiply Q in U by left bidiagonalizing vectors */
2884 /*                    in VT */
2885 /*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */
2886 /*                    (RWorkspace: 0) */
2887
2888                         i__2 = *lwork - iwork + 1;
2889                         cunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
2890                                 &work[itauq], &u[u_offset], ldu, &work[iwork],
2891                                  &i__2, &ierr);
2892
2893 /*                    Generate right bidiagonalizing vectors in VT */
2894 /*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2895 /*                    (RWorkspace: 0) */
2896
2897                         i__2 = *lwork - iwork + 1;
2898                         cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2899                                 itaup], &work[iwork], &i__2, &ierr)
2900                                 ;
2901                         irwork = ie + *n;
2902
2903 /*                    Perform bidiagonal QR iteration, computing left */
2904 /*                    singular vectors of A in U and computing right */
2905 /*                    singular vectors of A in VT */
2906 /*                    (CWorkspace: 0) */
2907 /*                    (RWorkspace: need BDSPAC) */
2908
2909                         cbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[
2910                                 vt_offset], ldvt, &u[u_offset], ldu, cdum, &
2911                                 c__1, &rwork[irwork], info);
2912
2913                     }
2914
2915                 }
2916
2917             }
2918
2919         } else {
2920
2921 /*           M .LT. MNTHR */
2922
2923 /*           Path 10 (M at least N, but not much larger) */
2924 /*           Reduce to bidiagonal form without QR decomposition */
2925
2926             ie = 1;
2927             itauq = 1;
2928             itaup = itauq + *n;
2929             iwork = itaup + *n;
2930
2931 /*           Bidiagonalize A */
2932 /*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) */
2933 /*           (RWorkspace: need N) */
2934
2935             i__2 = *lwork - iwork + 1;
2936             cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], 
2937                     &work[itaup], &work[iwork], &i__2, &ierr);
2938             if (wntuas) {
2939
2940 /*              If left singular vectors desired in U, copy result to U */
2941 /*              and generate left bidiagonalizing vectors in U */
2942 /*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) */
2943 /*              (RWorkspace: 0) */
2944
2945                 clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
2946                 if (wntus) {
2947                     ncu = *n;
2948                 }
2949                 if (wntua) {
2950                     ncu = *m;
2951                 }
2952                 i__2 = *lwork - iwork + 1;
2953                 cungbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
2954                         work[iwork], &i__2, &ierr);
2955             }
2956             if (wntvas) {
2957
2958 /*              If right singular vectors desired in VT, copy result to */
2959 /*              VT and generate right bidiagonalizing vectors in VT */
2960 /*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2961 /*              (RWorkspace: 0) */
2962
2963                 clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
2964                 i__2 = *lwork - iwork + 1;
2965                 cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
2966                         work[iwork], &i__2, &ierr);
2967             }
2968             if (wntuo) {
2969
2970 /*              If left singular vectors desired in A, generate left */
2971 /*              bidiagonalizing vectors in A */
2972 /*              (CWorkspace: need 3*N, prefer 2*N+N*NB) */
2973 /*              (RWorkspace: 0) */
2974
2975                 i__2 = *lwork - iwork + 1;
2976                 cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
2977                         iwork], &i__2, &ierr);
2978             }
2979             if (wntvo) {
2980
2981 /*              If right singular vectors desired in A, generate right */
2982 /*              bidiagonalizing vectors in A */
2983 /*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
2984 /*              (RWorkspace: 0) */
2985
2986                 i__2 = *lwork - iwork + 1;
2987                 cungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
2988                         iwork], &i__2, &ierr);
2989             }
2990             irwork = ie + *n;
2991             if (wntuas || wntuo) {
2992                 nru = *m;
2993             }
2994             if (wntun) {
2995                 nru = 0;
2996             }
2997             if (wntvas || wntvo) {
2998                 ncvt = *n;
2999             }
3000             if (wntvn) {
3001                 ncvt = 0;
3002             }
3003             if (! wntuo && ! wntvo) {
3004
3005 /*              Perform bidiagonal QR iteration, if desired, computing */
3006 /*              left singular vectors in U and computing right singular */
3007 /*              vectors in VT */
3008 /*              (CWorkspace: 0) */
3009 /*              (RWorkspace: need BDSPAC) */
3010
3011                 cbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[
3012                         vt_offset], ldvt, &u[u_offset], ldu, cdum, &c__1, &
3013                         rwork[irwork], info);
3014             } else if (! wntuo && wntvo) {
3015
3016 /*              Perform bidiagonal QR iteration, if desired, computing */
3017 /*              left singular vectors in U and computing right singular */
3018 /*              vectors in A */
3019 /*              (CWorkspace: 0) */
3020 /*              (RWorkspace: need BDSPAC) */
3021
3022                 cbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &a[
3023                         a_offset], lda, &u[u_offset], ldu, cdum, &c__1, &
3024                         rwork[irwork], info);
3025             } else {
3026
3027 /*              Perform bidiagonal QR iteration, if desired, computing */
3028 /*              left singular vectors in A and computing right singular */
3029 /*              vectors in VT */
3030 /*              (CWorkspace: 0) */
3031 /*              (RWorkspace: need BDSPAC) */
3032
3033                 cbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[
3034                         vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, &
3035                         rwork[irwork], info);
3036             }
3037
3038         }
3039
3040     } else {
3041
3042 /*        A has more columns than rows. If A has sufficiently more */
3043 /*        columns than rows, first reduce using the LQ decomposition (if */
3044 /*        sufficient workspace available) */
3045
3046         if (*n >= mnthr) {
3047
3048             if (wntvn) {
3049
3050 /*              Path 1t(N much larger than M, JOBVT='N') */
3051 /*              No right singular vectors to be computed */
3052
3053                 itau = 1;
3054                 iwork = itau + *m;
3055
3056 /*              Compute A=L*Q */
3057 /*              (CWorkspace: need 2*M, prefer M+M*NB) */
3058 /*              (RWorkspace: 0) */
3059
3060                 i__2 = *lwork - iwork + 1;
3061                 cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
3062                         i__2, &ierr);
3063
3064 /*              Zero out above L */
3065
3066                 i__2 = *m - 1;
3067                 i__3 = *m - 1;
3068                 claset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << 1) + 1]
3069                         , lda);
3070                 ie = 1;
3071                 itauq = 1;
3072                 itaup = itauq + *m;
3073                 iwork = itaup + *m;
3074
3075 /*              Bidiagonalize L in A */
3076 /*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
3077 /*              (RWorkspace: need M) */
3078
3079                 i__2 = *lwork - iwork + 1;
3080                 cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
3081                         itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3082                 if (wntuo || wntuas) {
3083
3084 /*                 If left singular vectors desired, generate Q */
3085 /*                 (CWorkspace: need 3*M, prefer 2*M+M*NB) */
3086 /*                 (RWorkspace: 0) */
3087
3088                     i__2 = *lwork - iwork + 1;
3089                     cungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
3090                             work[iwork], &i__2, &ierr);
3091                 }
3092                 irwork = ie + *m;
3093                 nru = 0;
3094                 if (wntuo || wntuas) {
3095                     nru = *m;
3096                 }
3097
3098 /*              Perform bidiagonal QR iteration, computing left singular */
3099 /*              vectors of A in A if desired */
3100 /*              (CWorkspace: 0) */
3101 /*              (RWorkspace: need BDSPAC) */
3102
3103                 cbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &rwork[ie], cdum, &
3104                         c__1, &a[a_offset], lda, cdum, &c__1, &rwork[irwork], 
3105                         info);
3106
3107 /*              If left singular vectors desired in U, copy them there */
3108
3109                 if (wntuas) {
3110                     clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3111                 }
3112
3113             } else if (wntvo && wntun) {
3114
3115 /*              Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
3116 /*              M right singular vectors to be overwritten on A and */
3117 /*              no left singular vectors to be computed */
3118
3119                 if (*lwork >= *m * *m + *m * 3) {
3120
3121 /*                 Sufficient workspace for a fast algorithm */
3122
3123                     ir = 1;
3124 /* Computing MAX */
3125                     i__2 = wrkbl, i__3 = *lda * *n;
3126                     if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
3127
3128 /*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
3129
3130                         ldwrku = *lda;
3131                         chunk = *n;
3132                         ldwrkr = *lda;
3133                     } else /* if(complicated condition) */ {
3134 /* Computing MAX */
3135                         i__2 = wrkbl, i__3 = *lda * *n;
3136                         if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
3137
3138 /*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
3139
3140                             ldwrku = *lda;
3141                             chunk = *n;
3142                             ldwrkr = *m;
3143                         } else {
3144
3145 /*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
3146
3147                             ldwrku = *m;
3148                             chunk = (*lwork - *m * *m) / *m;
3149                             ldwrkr = *m;
3150                         }
3151                     }
3152                     itau = ir + ldwrkr * *m;
3153                     iwork = itau + *m;
3154
3155 /*                 Compute A=L*Q */
3156 /*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3157 /*                 (RWorkspace: 0) */
3158
3159                     i__2 = *lwork - iwork + 1;
3160                     cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3161                             , &i__2, &ierr);
3162
3163 /*                 Copy L to WORK(IR) and zero out above it */
3164
3165                     clacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
3166                     i__2 = *m - 1;
3167                     i__3 = *m - 1;
3168                     claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 
3169                             ldwrkr], &ldwrkr);
3170
3171 /*                 Generate Q in A */
3172 /*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3173 /*                 (RWorkspace: 0) */
3174
3175                     i__2 = *lwork - iwork + 1;
3176                     cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3177                             iwork], &i__2, &ierr);
3178                     ie = 1;
3179                     itauq = itau;
3180                     itaup = itauq + *m;
3181                     iwork = itaup + *m;
3182
3183 /*                 Bidiagonalize L in WORK(IR) */
3184 /*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
3185 /*                 (RWorkspace: need M) */
3186
3187                     i__2 = *lwork - iwork + 1;
3188                     cgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
3189                             work[itauq], &work[itaup], &work[iwork], &i__2, &
3190                             ierr);
3191
3192 /*                 Generate right vectors bidiagonalizing L */
3193 /*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) */
3194 /*                 (RWorkspace: 0) */
3195
3196                     i__2 = *lwork - iwork + 1;
3197                     cungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
3198                             work[iwork], &i__2, &ierr);
3199                     irwork = ie + *m;
3200
3201 /*                 Perform bidiagonal QR iteration, computing right */
3202 /*                 singular vectors of L in WORK(IR) */
3203 /*                 (CWorkspace: need M*M) */
3204 /*                 (RWorkspace: need BDSPAC) */
3205
3206                     cbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], &work[
3207                             ir], &ldwrkr, cdum, &c__1, cdum, &c__1, &rwork[
3208                             irwork], info);
3209                     iu = itauq;
3210
3211 /*                 Multiply right singular vectors of L in WORK(IR) by Q */
3212 /*                 in A, storing result in WORK(IU) and copying to A */
3213 /*                 (CWorkspace: need M*M+M, prefer M*M+M*N) */
3214 /*                 (RWorkspace: 0) */
3215
3216                     i__2 = *n;
3217                     i__3 = chunk;
3218                     for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
3219                              i__3) {
3220 /* Computing MIN */
3221                         i__4 = *n - i__ + 1;
3222                         blk = f2cmin(i__4,chunk);
3223                         cgemm_("N", "N", m, &blk, m, &c_b2, &work[ir], &
3224                                 ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b1, &
3225                                 work[iu], &ldwrku);
3226                         clacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
3227                                 a_dim1 + 1], lda);
3228 /* L30: */
3229                     }
3230
3231                 } else {
3232
3233 /*                 Insufficient workspace for a fast algorithm */
3234
3235                     ie = 1;
3236                     itauq = 1;
3237                     itaup = itauq + *m;
3238                     iwork = itaup + *m;
3239
3240 /*                 Bidiagonalize A */
3241 /*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) */
3242 /*                 (RWorkspace: need M) */
3243
3244                     i__3 = *lwork - iwork + 1;
3245                     cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
3246                             itauq], &work[itaup], &work[iwork], &i__3, &ierr);
3247
3248 /*                 Generate right vectors bidiagonalizing A */
3249 /*                 (CWorkspace: need 3*M, prefer 2*M+M*NB) */
3250 /*                 (RWorkspace: 0) */
3251
3252                     i__3 = *lwork - iwork + 1;
3253                     cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
3254                             work[iwork], &i__3, &ierr);
3255                     irwork = ie + *m;
3256
3257 /*                 Perform bidiagonal QR iteration, computing right */
3258 /*                 singular vectors of A in A */
3259 /*                 (CWorkspace: 0) */
3260 /*                 (RWorkspace: need BDSPAC) */
3261
3262                     cbdsqr_("L", m, n, &c__0, &c__0, &s[1], &rwork[ie], &a[
3263                             a_offset], lda, cdum, &c__1, cdum, &c__1, &rwork[
3264                             irwork], info);
3265
3266                 }
3267
3268             } else if (wntvo && wntuas) {
3269
3270 /*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */
3271 /*              M right singular vectors to be overwritten on A and */
3272 /*              M left singular vectors to be computed in U */
3273
3274                 if (*lwork >= *m * *m + *m * 3) {
3275
3276 /*                 Sufficient workspace for a fast algorithm */
3277
3278                     ir = 1;
3279 /* Computing MAX */
3280                     i__3 = wrkbl, i__2 = *lda * *n;
3281                     if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
3282
3283 /*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
3284
3285                         ldwrku = *lda;
3286                         chunk = *n;
3287                         ldwrkr = *lda;
3288                     } else /* if(complicated condition) */ {
3289 /* Computing MAX */
3290                         i__3 = wrkbl, i__2 = *lda * *n;
3291                         if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
3292
3293 /*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
3294
3295                             ldwrku = *lda;
3296                             chunk = *n;
3297                             ldwrkr = *m;
3298                         } else {
3299
3300 /*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
3301
3302                             ldwrku = *m;
3303                             chunk = (*lwork - *m * *m) / *m;
3304                             ldwrkr = *m;
3305                         }
3306                     }
3307                     itau = ir + ldwrkr * *m;
3308                     iwork = itau + *m;
3309
3310 /*                 Compute A=L*Q */
3311 /*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3312 /*                 (RWorkspace: 0) */
3313
3314                     i__3 = *lwork - iwork + 1;
3315                     cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3316                             , &i__3, &ierr);
3317
3318 /*                 Copy L to U, zeroing about above it */
3319
3320                     clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3321                     i__3 = *m - 1;
3322                     i__2 = *m - 1;
3323                     claset_("U", &i__3, &i__2, &c_b1, &c_b1, &u[(u_dim1 << 1) 
3324                             + 1], ldu);
3325
3326 /*                 Generate Q in A */
3327 /*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3328 /*                 (RWorkspace: 0) */
3329
3330                     i__3 = *lwork - iwork + 1;
3331                     cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3332                             iwork], &i__3, &ierr);
3333                     ie = 1;
3334                     itauq = itau;
3335                     itaup = itauq + *m;
3336                     iwork = itaup + *m;
3337
3338 /*                 Bidiagonalize L in U, copying result to WORK(IR) */
3339 /*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
3340 /*                 (RWorkspace: need M) */
3341
3342                     i__3 = *lwork - iwork + 1;
3343                     cgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &work[
3344                             itauq], &work[itaup], &work[iwork], &i__3, &ierr);
3345                     clacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
3346
3347 /*                 Generate right vectors bidiagonalizing L in WORK(IR) */
3348 /*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) */
3349 /*                 (RWorkspace: 0) */
3350
3351                     i__3 = *lwork - iwork + 1;
3352                     cungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
3353                             work[iwork], &i__3, &ierr);
3354
3355 /*                 Generate left vectors bidiagonalizing L in U */
3356 /*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */
3357 /*                 (RWorkspace: 0) */
3358
3359                     i__3 = *lwork - iwork + 1;
3360                     cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3361                             work[iwork], &i__3, &ierr);
3362                     irwork = ie + *m;
3363
3364 /*                 Perform bidiagonal QR iteration, computing left */
3365 /*                 singular vectors of L in U, and computing right */
3366 /*                 singular vectors of L in WORK(IR) */
3367 /*                 (CWorkspace: need M*M) */
3368 /*                 (RWorkspace: need BDSPAC) */
3369
3370                     cbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ir],
3371                              &ldwrkr, &u[u_offset], ldu, cdum, &c__1, &rwork[
3372                             irwork], info);
3373                     iu = itauq;
3374
3375 /*                 Multiply right singular vectors of L in WORK(IR) by Q */
3376 /*                 in A, storing result in WORK(IU) and copying to A */
3377 /*                 (CWorkspace: need M*M+M, prefer M*M+M*N)) */
3378 /*                 (RWorkspace: 0) */
3379
3380                     i__3 = *n;
3381                     i__2 = chunk;
3382                     for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
3383                              i__2) {
3384 /* Computing MIN */
3385                         i__4 = *n - i__ + 1;
3386                         blk = f2cmin(i__4,chunk);
3387                         cgemm_("N", "N", m, &blk, m, &c_b2, &work[ir], &
3388                                 ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b1, &
3389                                 work[iu], &ldwrku);
3390                         clacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
3391                                 a_dim1 + 1], lda);
3392 /* L40: */
3393                     }
3394
3395                 } else {
3396
3397 /*                 Insufficient workspace for a fast algorithm */
3398
3399                     itau = 1;
3400                     iwork = itau + *m;
3401
3402 /*                 Compute A=L*Q */
3403 /*                 (CWorkspace: need 2*M, prefer M+M*NB) */
3404 /*                 (RWorkspace: 0) */
3405
3406                     i__2 = *lwork - iwork + 1;
3407                     cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3408                             , &i__2, &ierr);
3409
3410 /*                 Copy L to U, zeroing out above it */
3411
3412                     clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3413                     i__2 = *m - 1;
3414                     i__3 = *m - 1;
3415                     claset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 << 1) 
3416                             + 1], ldu);
3417
3418 /*                 Generate Q in A */
3419 /*                 (CWorkspace: need 2*M, prefer M+M*NB) */
3420 /*                 (RWorkspace: 0) */
3421
3422                     i__2 = *lwork - iwork + 1;
3423                     cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3424                             iwork], &i__2, &ierr);
3425                     ie = 1;
3426                     itauq = itau;
3427                     itaup = itauq + *m;
3428                     iwork = itaup + *m;
3429
3430 /*                 Bidiagonalize L in U */
3431 /*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
3432 /*                 (RWorkspace: need M) */
3433
3434                     i__2 = *lwork - iwork + 1;
3435                     cgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &work[
3436                             itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3437
3438 /*                 Multiply right vectors bidiagonalizing L by Q in A */
3439 /*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
3440 /*                 (RWorkspace: 0) */
3441
3442                     i__2 = *lwork - iwork + 1;
3443                     cunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, &work[
3444                             itaup], &a[a_offset], lda, &work[iwork], &i__2, &
3445                             ierr);
3446
3447 /*                 Generate left vectors bidiagonalizing L in U */
3448 /*                 (CWorkspace: need 3*M, prefer 2*M+M*NB) */
3449 /*                 (RWorkspace: 0) */
3450
3451                     i__2 = *lwork - iwork + 1;
3452                     cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3453                             work[iwork], &i__2, &ierr);
3454                     irwork = ie + *m;
3455
3456 /*                 Perform bidiagonal QR iteration, computing left */
3457 /*                 singular vectors of A in U and computing right */
3458 /*                 singular vectors of A in A */
3459 /*                 (CWorkspace: 0) */
3460 /*                 (RWorkspace: need BDSPAC) */
3461
3462                     cbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &a[
3463                             a_offset], lda, &u[u_offset], ldu, cdum, &c__1, &
3464                             rwork[irwork], info);
3465
3466                 }
3467
3468             } else if (wntvs) {
3469
3470                 if (wntun) {
3471
3472 /*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
3473 /*                 M right singular vectors to be computed in VT and */
3474 /*                 no left singular vectors to be computed */
3475
3476                     if (*lwork >= *m * *m + *m * 3) {
3477
3478 /*                    Sufficient workspace for a fast algorithm */
3479
3480                         ir = 1;
3481                         if (*lwork >= wrkbl + *lda * *m) {
3482
3483 /*                       WORK(IR) is LDA by M */
3484
3485                             ldwrkr = *lda;
3486                         } else {
3487
3488 /*                       WORK(IR) is M by M */
3489
3490                             ldwrkr = *m;
3491                         }
3492                         itau = ir + ldwrkr * *m;
3493                         iwork = itau + *m;
3494
3495 /*                    Compute A=L*Q */
3496 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3497 /*                    (RWorkspace: 0) */
3498
3499                         i__2 = *lwork - iwork + 1;
3500                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3501                                 iwork], &i__2, &ierr);
3502
3503 /*                    Copy L to WORK(IR), zeroing out above it */
3504
3505                         clacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
3506                                 ldwrkr);
3507                         i__2 = *m - 1;
3508                         i__3 = *m - 1;
3509                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 
3510                                 ldwrkr], &ldwrkr);
3511
3512 /*                    Generate Q in A */
3513 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3514 /*                    (RWorkspace: 0) */
3515
3516                         i__2 = *lwork - iwork + 1;
3517                         cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3518                                 work[iwork], &i__2, &ierr);
3519                         ie = 1;
3520                         itauq = itau;
3521                         itaup = itauq + *m;
3522                         iwork = itaup + *m;
3523
3524 /*                    Bidiagonalize L in WORK(IR) */
3525 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
3526 /*                    (RWorkspace: need M) */
3527
3528                         i__2 = *lwork - iwork + 1;
3529                         cgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
3530                                 work[itauq], &work[itaup], &work[iwork], &
3531                                 i__2, &ierr);
3532
3533 /*                    Generate right vectors bidiagonalizing L in */
3534 /*                    WORK(IR) */
3535 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) */
3536 /*                    (RWorkspace: 0) */
3537
3538                         i__2 = *lwork - iwork + 1;
3539                         cungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
3540                                 , &work[iwork], &i__2, &ierr);
3541                         irwork = ie + *m;
3542
3543 /*                    Perform bidiagonal QR iteration, computing right */
3544 /*                    singular vectors of L in WORK(IR) */
3545 /*                    (CWorkspace: need M*M) */
3546 /*                    (RWorkspace: need BDSPAC) */
3547
3548                         cbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], &
3549                                 work[ir], &ldwrkr, cdum, &c__1, cdum, &c__1, &
3550                                 rwork[irwork], info);
3551
3552 /*                    Multiply right singular vectors of L in WORK(IR) by */
3553 /*                    Q in A, storing result in VT */
3554 /*                    (CWorkspace: need M*M) */
3555 /*                    (RWorkspace: 0) */
3556
3557                         cgemm_("N", "N", m, n, m, &c_b2, &work[ir], &ldwrkr, &
3558                                 a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt);
3559
3560                     } else {
3561
3562 /*                    Insufficient workspace for a fast algorithm */
3563
3564                         itau = 1;
3565                         iwork = itau + *m;
3566
3567 /*                    Compute A=L*Q */
3568 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3569 /*                    (RWorkspace: 0) */
3570
3571                         i__2 = *lwork - iwork + 1;
3572                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3573                                 iwork], &i__2, &ierr);
3574
3575 /*                    Copy result to VT */
3576
3577                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3578                                 ldvt);
3579
3580 /*                    Generate Q in VT */
3581 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3582 /*                    (RWorkspace: 0) */
3583
3584                         i__2 = *lwork - iwork + 1;
3585                         cunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3586                                 work[iwork], &i__2, &ierr);
3587                         ie = 1;
3588                         itauq = itau;
3589                         itaup = itauq + *m;
3590                         iwork = itaup + *m;
3591
3592 /*                    Zero out above L in A */
3593
3594                         i__2 = *m - 1;
3595                         i__3 = *m - 1;
3596                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 <<
3597                                  1) + 1], lda);
3598
3599 /*                    Bidiagonalize L in A */
3600 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
3601 /*                    (RWorkspace: need M) */
3602
3603                         i__2 = *lwork - iwork + 1;
3604                         cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &
3605                                 work[itauq], &work[itaup], &work[iwork], &
3606                                 i__2, &ierr);
3607
3608 /*                    Multiply right vectors bidiagonalizing L by Q in VT */
3609 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
3610 /*                    (RWorkspace: 0) */
3611
3612                         i__2 = *lwork - iwork + 1;
3613                         cunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, &
3614                                 work[itaup], &vt[vt_offset], ldvt, &work[
3615                                 iwork], &i__2, &ierr);
3616                         irwork = ie + *m;
3617
3618 /*                    Perform bidiagonal QR iteration, computing right */
3619 /*                    singular vectors of A in VT */
3620 /*                    (CWorkspace: 0) */
3621 /*                    (RWorkspace: need BDSPAC) */
3622
3623                         cbdsqr_("U", m, n, &c__0, &c__0, &s[1], &rwork[ie], &
3624                                 vt[vt_offset], ldvt, cdum, &c__1, cdum, &c__1,
3625                                  &rwork[irwork], info);
3626
3627                     }
3628
3629                 } else if (wntuo) {
3630
3631 /*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
3632 /*                 M right singular vectors to be computed in VT and */
3633 /*                 M left singular vectors to be overwritten on A */
3634
3635                     if (*lwork >= (*m << 1) * *m + *m * 3) {
3636
3637 /*                    Sufficient workspace for a fast algorithm */
3638
3639                         iu = 1;
3640                         if (*lwork >= wrkbl + (*lda << 1) * *m) {
3641
3642 /*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
3643
3644                             ldwrku = *lda;
3645                             ir = iu + ldwrku * *m;
3646                             ldwrkr = *lda;
3647                         } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
3648
3649 /*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
3650
3651                             ldwrku = *lda;
3652                             ir = iu + ldwrku * *m;
3653                             ldwrkr = *m;
3654                         } else {
3655
3656 /*                       WORK(IU) is M by M and WORK(IR) is M by M */
3657
3658                             ldwrku = *m;
3659                             ir = iu + ldwrku * *m;
3660                             ldwrkr = *m;
3661                         }
3662                         itau = ir + ldwrkr * *m;
3663                         iwork = itau + *m;
3664
3665 /*                    Compute A=L*Q */
3666 /*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3667 /*                    (RWorkspace: 0) */
3668
3669                         i__2 = *lwork - iwork + 1;
3670                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3671                                 iwork], &i__2, &ierr);
3672
3673 /*                    Copy L to WORK(IU), zeroing out below it */
3674
3675                         clacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3676                                 ldwrku);
3677                         i__2 = *m - 1;
3678                         i__3 = *m - 1;
3679                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 
3680                                 ldwrku], &ldwrku);
3681
3682 /*                    Generate Q in A */
3683 /*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3684 /*                    (RWorkspace: 0) */
3685
3686                         i__2 = *lwork - iwork + 1;
3687                         cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3688                                 work[iwork], &i__2, &ierr);
3689                         ie = 1;
3690                         itauq = itau;
3691                         itaup = itauq + *m;
3692                         iwork = itaup + *m;
3693
3694 /*                    Bidiagonalize L in WORK(IU), copying result to */
3695 /*                    WORK(IR) */
3696 /*                    (CWorkspace: need   2*M*M+3*M, */
3697 /*                                 prefer 2*M*M+2*M+2*M*NB) */
3698 /*                    (RWorkspace: need   M) */
3699
3700                         i__2 = *lwork - iwork + 1;
3701                         cgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], &
3702                                 work[itauq], &work[itaup], &work[iwork], &
3703                                 i__2, &ierr);
3704                         clacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
3705                                 ldwrkr);
3706
3707 /*                    Generate right bidiagonalizing vectors in WORK(IU) */
3708 /*                    (CWorkspace: need   2*M*M+3*M-1, */
3709 /*                                 prefer 2*M*M+2*M+(M-1)*NB) */
3710 /*                    (RWorkspace: 0) */
3711
3712                         i__2 = *lwork - iwork + 1;
3713                         cungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3714                                 , &work[iwork], &i__2, &ierr);
3715
3716 /*                    Generate left bidiagonalizing vectors in WORK(IR) */
3717 /*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */
3718 /*                    (RWorkspace: 0) */
3719
3720                         i__2 = *lwork - iwork + 1;
3721                         cungbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
3722                                 , &work[iwork], &i__2, &ierr);
3723                         irwork = ie + *m;
3724
3725 /*                    Perform bidiagonal QR iteration, computing left */
3726 /*                    singular vectors of L in WORK(IR) and computing */
3727 /*                    right singular vectors of L in WORK(IU) */
3728 /*                    (CWorkspace: need 2*M*M) */
3729 /*                    (RWorkspace: need BDSPAC) */
3730
3731                         cbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[
3732                                 iu], &ldwrku, &work[ir], &ldwrkr, cdum, &c__1,
3733                                  &rwork[irwork], info);
3734
3735 /*                    Multiply right singular vectors of L in WORK(IU) by */
3736 /*                    Q in A, storing result in VT */
3737 /*                    (CWorkspace: need M*M) */
3738 /*                    (RWorkspace: 0) */
3739
3740                         cgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, &
3741                                 a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt);
3742
3743 /*                    Copy left singular vectors of L to A */
3744 /*                    (CWorkspace: need M*M) */
3745 /*                    (RWorkspace: 0) */
3746
3747                         clacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
3748                                 lda);
3749
3750                     } else {
3751
3752 /*                    Insufficient workspace for a fast algorithm */
3753
3754                         itau = 1;
3755                         iwork = itau + *m;
3756
3757 /*                    Compute A=L*Q, copying result to VT */
3758 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3759 /*                    (RWorkspace: 0) */
3760
3761                         i__2 = *lwork - iwork + 1;
3762                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3763                                 iwork], &i__2, &ierr);
3764                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3765                                 ldvt);
3766
3767 /*                    Generate Q in VT */
3768 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3769 /*                    (RWorkspace: 0) */
3770
3771                         i__2 = *lwork - iwork + 1;
3772                         cunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3773                                 work[iwork], &i__2, &ierr);
3774                         ie = 1;
3775                         itauq = itau;
3776                         itaup = itauq + *m;
3777                         iwork = itaup + *m;
3778
3779 /*                    Zero out above L in A */
3780
3781                         i__2 = *m - 1;
3782                         i__3 = *m - 1;
3783                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 <<
3784                                  1) + 1], lda);
3785
3786 /*                    Bidiagonalize L in A */
3787 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
3788 /*                    (RWorkspace: need M) */
3789
3790                         i__2 = *lwork - iwork + 1;
3791                         cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &
3792                                 work[itauq], &work[itaup], &work[iwork], &
3793                                 i__2, &ierr);
3794
3795 /*                    Multiply right vectors bidiagonalizing L by Q in VT */
3796 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
3797 /*                    (RWorkspace: 0) */
3798
3799                         i__2 = *lwork - iwork + 1;
3800                         cunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, &
3801                                 work[itaup], &vt[vt_offset], ldvt, &work[
3802                                 iwork], &i__2, &ierr);
3803
3804 /*                    Generate left bidiagonalizing vectors of L in A */
3805 /*                    (CWorkspace: need 3*M, prefer 2*M+M*NB) */
3806 /*                    (RWorkspace: 0) */
3807
3808                         i__2 = *lwork - iwork + 1;
3809                         cungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
3810                                  &work[iwork], &i__2, &ierr);
3811                         irwork = ie + *m;
3812
3813 /*                    Perform bidiagonal QR iteration, computing left */
3814 /*                    singular vectors of A in A and computing right */
3815 /*                    singular vectors of A in VT */
3816 /*                    (CWorkspace: 0) */
3817 /*                    (RWorkspace: need BDSPAC) */
3818
3819                         cbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[
3820                                 vt_offset], ldvt, &a[a_offset], lda, cdum, &
3821                                 c__1, &rwork[irwork], info);
3822
3823                     }
3824
3825                 } else if (wntuas) {
3826
3827 /*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
3828 /*                         JOBVT='S') */
3829 /*                 M right singular vectors to be computed in VT and */
3830 /*                 M left singular vectors to be computed in U */
3831
3832                     if (*lwork >= *m * *m + *m * 3) {
3833
3834 /*                    Sufficient workspace for a fast algorithm */
3835
3836                         iu = 1;
3837                         if (*lwork >= wrkbl + *lda * *m) {
3838
3839 /*                       WORK(IU) is LDA by N */
3840
3841                             ldwrku = *lda;
3842                         } else {
3843
3844 /*                       WORK(IU) is LDA by M */
3845
3846                             ldwrku = *m;
3847                         }
3848                         itau = iu + ldwrku * *m;
3849                         iwork = itau + *m;
3850
3851 /*                    Compute A=L*Q */
3852 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3853 /*                    (RWorkspace: 0) */
3854
3855                         i__2 = *lwork - iwork + 1;
3856                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3857                                 iwork], &i__2, &ierr);
3858
3859 /*                    Copy L to WORK(IU), zeroing out above it */
3860
3861                         clacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3862                                 ldwrku);
3863                         i__2 = *m - 1;
3864                         i__3 = *m - 1;
3865                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 
3866                                 ldwrku], &ldwrku);
3867
3868 /*                    Generate Q in A */
3869 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
3870 /*                    (RWorkspace: 0) */
3871
3872                         i__2 = *lwork - iwork + 1;
3873                         cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3874                                 work[iwork], &i__2, &ierr);
3875                         ie = 1;
3876                         itauq = itau;
3877                         itaup = itauq + *m;
3878                         iwork = itaup + *m;
3879
3880 /*                    Bidiagonalize L in WORK(IU), copying result to U */
3881 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
3882 /*                    (RWorkspace: need M) */
3883
3884                         i__2 = *lwork - iwork + 1;
3885                         cgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], &
3886                                 work[itauq], &work[itaup], &work[iwork], &
3887                                 i__2, &ierr);
3888                         clacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
3889                                 ldu);
3890
3891 /*                    Generate right bidiagonalizing vectors in WORK(IU) */
3892 /*                    (CWorkspace: need   M*M+3*M-1, */
3893 /*                                 prefer M*M+2*M+(M-1)*NB) */
3894 /*                    (RWorkspace: 0) */
3895
3896                         i__2 = *lwork - iwork + 1;
3897                         cungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3898                                 , &work[iwork], &i__2, &ierr);
3899
3900 /*                    Generate left bidiagonalizing vectors in U */
3901 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */
3902 /*                    (RWorkspace: 0) */
3903
3904                         i__2 = *lwork - iwork + 1;
3905                         cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3906                                  &work[iwork], &i__2, &ierr);
3907                         irwork = ie + *m;
3908
3909 /*                    Perform bidiagonal QR iteration, computing left */
3910 /*                    singular vectors of L in U and computing right */
3911 /*                    singular vectors of L in WORK(IU) */
3912 /*                    (CWorkspace: need M*M) */
3913 /*                    (RWorkspace: need BDSPAC) */
3914
3915                         cbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[
3916                                 iu], &ldwrku, &u[u_offset], ldu, cdum, &c__1, 
3917                                 &rwork[irwork], info);
3918
3919 /*                    Multiply right singular vectors of L in WORK(IU) by */
3920 /*                    Q in A, storing result in VT */
3921 /*                    (CWorkspace: need M*M) */
3922 /*                    (RWorkspace: 0) */
3923
3924                         cgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, &
3925                                 a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt);
3926
3927                     } else {
3928
3929 /*                    Insufficient workspace for a fast algorithm */
3930
3931                         itau = 1;
3932                         iwork = itau + *m;
3933
3934 /*                    Compute A=L*Q, copying result to VT */
3935 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3936 /*                    (RWorkspace: 0) */
3937
3938                         i__2 = *lwork - iwork + 1;
3939                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3940                                 iwork], &i__2, &ierr);
3941                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3942                                 ldvt);
3943
3944 /*                    Generate Q in VT */
3945 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
3946 /*                    (RWorkspace: 0) */
3947
3948                         i__2 = *lwork - iwork + 1;
3949                         cunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3950                                 work[iwork], &i__2, &ierr);
3951
3952 /*                    Copy L to U, zeroing out above it */
3953
3954                         clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
3955                                 ldu);
3956                         i__2 = *m - 1;
3957                         i__3 = *m - 1;
3958                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 <<
3959                                  1) + 1], ldu);
3960                         ie = 1;
3961                         itauq = itau;
3962                         itaup = itauq + *m;
3963                         iwork = itaup + *m;
3964
3965 /*                    Bidiagonalize L in U */
3966 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
3967 /*                    (RWorkspace: need M) */
3968
3969                         i__2 = *lwork - iwork + 1;
3970                         cgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &
3971                                 work[itauq], &work[itaup], &work[iwork], &
3972                                 i__2, &ierr);
3973
3974 /*                    Multiply right bidiagonalizing vectors in U by Q */
3975 /*                    in VT */
3976 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
3977 /*                    (RWorkspace: 0) */
3978
3979                         i__2 = *lwork - iwork + 1;
3980                         cunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, &
3981                                 work[itaup], &vt[vt_offset], ldvt, &work[
3982                                 iwork], &i__2, &ierr);
3983
3984 /*                    Generate left bidiagonalizing vectors in U */
3985 /*                    (CWorkspace: need 3*M, prefer 2*M+M*NB) */
3986 /*                    (RWorkspace: 0) */
3987
3988                         i__2 = *lwork - iwork + 1;
3989                         cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3990                                  &work[iwork], &i__2, &ierr);
3991                         irwork = ie + *m;
3992
3993 /*                    Perform bidiagonal QR iteration, computing left */
3994 /*                    singular vectors of A in U and computing right */
3995 /*                    singular vectors of A in VT */
3996 /*                    (CWorkspace: 0) */
3997 /*                    (RWorkspace: need BDSPAC) */
3998
3999                         cbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[
4000                                 vt_offset], ldvt, &u[u_offset], ldu, cdum, &
4001                                 c__1, &rwork[irwork], info);
4002
4003                     }
4004
4005                 }
4006
4007             } else if (wntva) {
4008
4009                 if (wntun) {
4010
4011 /*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
4012 /*                 N right singular vectors to be computed in VT and */
4013 /*                 no left singular vectors to be computed */
4014
4015 /* Computing MAX */
4016                     i__2 = *n + *m, i__3 = *m * 3;
4017                     if (*lwork >= *m * *m + f2cmax(i__2,i__3)) {
4018
4019 /*                    Sufficient workspace for a fast algorithm */
4020
4021                         ir = 1;
4022                         if (*lwork >= wrkbl + *lda * *m) {
4023
4024 /*                       WORK(IR) is LDA by M */
4025
4026                             ldwrkr = *lda;
4027                         } else {
4028
4029 /*                       WORK(IR) is M by M */
4030
4031                             ldwrkr = *m;
4032                         }
4033                         itau = ir + ldwrkr * *m;
4034                         iwork = itau + *m;
4035
4036 /*                    Compute A=L*Q, copying result to VT */
4037 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
4038 /*                    (RWorkspace: 0) */
4039
4040                         i__2 = *lwork - iwork + 1;
4041                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4042                                 iwork], &i__2, &ierr);
4043                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4044                                 ldvt);
4045
4046 /*                    Copy L to WORK(IR), zeroing out above it */
4047
4048                         clacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
4049                                 ldwrkr);
4050                         i__2 = *m - 1;
4051                         i__3 = *m - 1;
4052                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 
4053                                 ldwrkr], &ldwrkr);
4054
4055 /*                    Generate Q in VT */
4056 /*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) */
4057 /*                    (RWorkspace: 0) */
4058
4059                         i__2 = *lwork - iwork + 1;
4060                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4061                                 work[iwork], &i__2, &ierr);
4062                         ie = 1;
4063                         itauq = itau;
4064                         itaup = itauq + *m;
4065                         iwork = itaup + *m;
4066
4067 /*                    Bidiagonalize L in WORK(IR) */
4068 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
4069 /*                    (RWorkspace: need M) */
4070
4071                         i__2 = *lwork - iwork + 1;
4072                         cgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], &
4073                                 work[itauq], &work[itaup], &work[iwork], &
4074                                 i__2, &ierr);
4075
4076 /*                    Generate right bidiagonalizing vectors in WORK(IR) */
4077 /*                    (CWorkspace: need   M*M+3*M-1, */
4078 /*                                 prefer M*M+2*M+(M-1)*NB) */
4079 /*                    (RWorkspace: 0) */
4080
4081                         i__2 = *lwork - iwork + 1;
4082                         cungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
4083                                 , &work[iwork], &i__2, &ierr);
4084                         irwork = ie + *m;
4085
4086 /*                    Perform bidiagonal QR iteration, computing right */
4087 /*                    singular vectors of L in WORK(IR) */
4088 /*                    (CWorkspace: need M*M) */
4089 /*                    (RWorkspace: need BDSPAC) */
4090
4091                         cbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], &
4092                                 work[ir], &ldwrkr, cdum, &c__1, cdum, &c__1, &
4093                                 rwork[irwork], info);
4094
4095 /*                    Multiply right singular vectors of L in WORK(IR) by */
4096 /*                    Q in VT, storing result in A */
4097 /*                    (CWorkspace: need M*M) */
4098 /*                    (RWorkspace: 0) */
4099
4100                         cgemm_("N", "N", m, n, m, &c_b2, &work[ir], &ldwrkr, &
4101                                 vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda);
4102
4103 /*                    Copy right singular vectors of A from A to VT */
4104
4105                         clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
4106                                 ldvt);
4107
4108                     } else {
4109
4110 /*                    Insufficient workspace for a fast algorithm */
4111
4112                         itau = 1;
4113                         iwork = itau + *m;
4114
4115 /*                    Compute A=L*Q, copying result to VT */
4116 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
4117 /*                    (RWorkspace: 0) */
4118
4119                         i__2 = *lwork - iwork + 1;
4120                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4121                                 iwork], &i__2, &ierr);
4122                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4123                                 ldvt);
4124
4125 /*                    Generate Q in VT */
4126 /*                    (CWorkspace: need M+N, prefer M+N*NB) */
4127 /*                    (RWorkspace: 0) */
4128
4129                         i__2 = *lwork - iwork + 1;
4130                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4131                                 work[iwork], &i__2, &ierr);
4132                         ie = 1;
4133                         itauq = itau;
4134                         itaup = itauq + *m;
4135                         iwork = itaup + *m;
4136
4137 /*                    Zero out above L in A */
4138
4139                         i__2 = *m - 1;
4140                         i__3 = *m - 1;
4141                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 <<
4142                                  1) + 1], lda);
4143
4144 /*                    Bidiagonalize L in A */
4145 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
4146 /*                    (RWorkspace: need M) */
4147
4148                         i__2 = *lwork - iwork + 1;
4149                         cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &
4150                                 work[itauq], &work[itaup], &work[iwork], &
4151                                 i__2, &ierr);
4152
4153 /*                    Multiply right bidiagonalizing vectors in A by Q */
4154 /*                    in VT */
4155 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
4156 /*                    (RWorkspace: 0) */
4157
4158                         i__2 = *lwork - iwork + 1;
4159                         cunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, &
4160                                 work[itaup], &vt[vt_offset], ldvt, &work[
4161                                 iwork], &i__2, &ierr);
4162                         irwork = ie + *m;
4163
4164 /*                    Perform bidiagonal QR iteration, computing right */
4165 /*                    singular vectors of A in VT */
4166 /*                    (CWorkspace: 0) */
4167 /*                    (RWorkspace: need BDSPAC) */
4168
4169                         cbdsqr_("U", m, n, &c__0, &c__0, &s[1], &rwork[ie], &
4170                                 vt[vt_offset], ldvt, cdum, &c__1, cdum, &c__1,
4171                                  &rwork[irwork], info);
4172
4173                     }
4174
4175                 } else if (wntuo) {
4176
4177 /*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
4178 /*                 N right singular vectors to be computed in VT and */
4179 /*                 M left singular vectors to be overwritten on A */
4180
4181 /* Computing MAX */
4182                     i__2 = *n + *m, i__3 = *m * 3;
4183                     if (*lwork >= (*m << 1) * *m + f2cmax(i__2,i__3)) {
4184
4185 /*                    Sufficient workspace for a fast algorithm */
4186
4187                         iu = 1;
4188                         if (*lwork >= wrkbl + (*lda << 1) * *m) {
4189
4190 /*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
4191
4192                             ldwrku = *lda;
4193                             ir = iu + ldwrku * *m;
4194                             ldwrkr = *lda;
4195                         } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
4196
4197 /*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
4198
4199                             ldwrku = *lda;
4200                             ir = iu + ldwrku * *m;
4201                             ldwrkr = *m;
4202                         } else {
4203
4204 /*                       WORK(IU) is M by M and WORK(IR) is M by M */
4205
4206                             ldwrku = *m;
4207                             ir = iu + ldwrku * *m;
4208                             ldwrkr = *m;
4209                         }
4210                         itau = ir + ldwrkr * *m;
4211                         iwork = itau + *m;
4212
4213 /*                    Compute A=L*Q, copying result to VT */
4214 /*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
4215 /*                    (RWorkspace: 0) */
4216
4217                         i__2 = *lwork - iwork + 1;
4218                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4219                                 iwork], &i__2, &ierr);
4220                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4221                                 ldvt);
4222
4223 /*                    Generate Q in VT */
4224 /*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */
4225 /*                    (RWorkspace: 0) */
4226
4227                         i__2 = *lwork - iwork + 1;
4228                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4229                                 work[iwork], &i__2, &ierr);
4230
4231 /*                    Copy L to WORK(IU), zeroing out above it */
4232
4233                         clacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
4234                                 ldwrku);
4235                         i__2 = *m - 1;
4236                         i__3 = *m - 1;
4237                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 
4238                                 ldwrku], &ldwrku);
4239                         ie = 1;
4240                         itauq = itau;
4241                         itaup = itauq + *m;
4242                         iwork = itaup + *m;
4243
4244 /*                    Bidiagonalize L in WORK(IU), copying result to */
4245 /*                    WORK(IR) */
4246 /*                    (CWorkspace: need   2*M*M+3*M, */
4247 /*                                 prefer 2*M*M+2*M+2*M*NB) */
4248 /*                    (RWorkspace: need   M) */
4249
4250                         i__2 = *lwork - iwork + 1;
4251                         cgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], &
4252                                 work[itauq], &work[itaup], &work[iwork], &
4253                                 i__2, &ierr);
4254                         clacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
4255                                 ldwrkr);
4256
4257 /*                    Generate right bidiagonalizing vectors in WORK(IU) */
4258 /*                    (CWorkspace: need   2*M*M+3*M-1, */
4259 /*                                 prefer 2*M*M+2*M+(M-1)*NB) */
4260 /*                    (RWorkspace: 0) */
4261
4262                         i__2 = *lwork - iwork + 1;
4263                         cungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
4264                                 , &work[iwork], &i__2, &ierr);
4265
4266 /*                    Generate left bidiagonalizing vectors in WORK(IR) */
4267 /*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */
4268 /*                    (RWorkspace: 0) */
4269
4270                         i__2 = *lwork - iwork + 1;
4271                         cungbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
4272                                 , &work[iwork], &i__2, &ierr);
4273                         irwork = ie + *m;
4274
4275 /*                    Perform bidiagonal QR iteration, computing left */
4276 /*                    singular vectors of L in WORK(IR) and computing */
4277 /*                    right singular vectors of L in WORK(IU) */
4278 /*                    (CWorkspace: need 2*M*M) */
4279 /*                    (RWorkspace: need BDSPAC) */
4280
4281                         cbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[
4282                                 iu], &ldwrku, &work[ir], &ldwrkr, cdum, &c__1,
4283                                  &rwork[irwork], info);
4284
4285 /*                    Multiply right singular vectors of L in WORK(IU) by */
4286 /*                    Q in VT, storing result in A */
4287 /*                    (CWorkspace: need M*M) */
4288 /*                    (RWorkspace: 0) */
4289
4290                         cgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, &
4291                                 vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda);
4292
4293 /*                    Copy right singular vectors of A from A to VT */
4294
4295                         clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
4296                                 ldvt);
4297
4298 /*                    Copy left singular vectors of A from WORK(IR) to A */
4299
4300                         clacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
4301                                 lda);
4302
4303                     } else {
4304
4305 /*                    Insufficient workspace for a fast algorithm */
4306
4307                         itau = 1;
4308                         iwork = itau + *m;
4309
4310 /*                    Compute A=L*Q, copying result to VT */
4311 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
4312 /*                    (RWorkspace: 0) */
4313
4314                         i__2 = *lwork - iwork + 1;
4315                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4316                                 iwork], &i__2, &ierr);
4317                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4318                                 ldvt);
4319
4320 /*                    Generate Q in VT */
4321 /*                    (CWorkspace: need M+N, prefer M+N*NB) */
4322 /*                    (RWorkspace: 0) */
4323
4324                         i__2 = *lwork - iwork + 1;
4325                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4326                                 work[iwork], &i__2, &ierr);
4327                         ie = 1;
4328                         itauq = itau;
4329                         itaup = itauq + *m;
4330                         iwork = itaup + *m;
4331
4332 /*                    Zero out above L in A */
4333
4334                         i__2 = *m - 1;
4335                         i__3 = *m - 1;
4336                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 <<
4337                                  1) + 1], lda);
4338
4339 /*                    Bidiagonalize L in A */
4340 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
4341 /*                    (RWorkspace: need M) */
4342
4343                         i__2 = *lwork - iwork + 1;
4344                         cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &
4345                                 work[itauq], &work[itaup], &work[iwork], &
4346                                 i__2, &ierr);
4347
4348 /*                    Multiply right bidiagonalizing vectors in A by Q */
4349 /*                    in VT */
4350 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
4351 /*                    (RWorkspace: 0) */
4352
4353                         i__2 = *lwork - iwork + 1;
4354                         cunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, &
4355                                 work[itaup], &vt[vt_offset], ldvt, &work[
4356                                 iwork], &i__2, &ierr);
4357
4358 /*                    Generate left bidiagonalizing vectors in A */
4359 /*                    (CWorkspace: need 3*M, prefer 2*M+M*NB) */
4360 /*                    (RWorkspace: 0) */
4361
4362                         i__2 = *lwork - iwork + 1;
4363                         cungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
4364                                  &work[iwork], &i__2, &ierr);
4365                         irwork = ie + *m;
4366
4367 /*                    Perform bidiagonal QR iteration, computing left */
4368 /*                    singular vectors of A in A and computing right */
4369 /*                    singular vectors of A in VT */
4370 /*                    (CWorkspace: 0) */
4371 /*                    (RWorkspace: need BDSPAC) */
4372
4373                         cbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[
4374                                 vt_offset], ldvt, &a[a_offset], lda, cdum, &
4375                                 c__1, &rwork[irwork], info);
4376
4377                     }
4378
4379                 } else if (wntuas) {
4380
4381 /*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
4382 /*                         JOBVT='A') */
4383 /*                 N right singular vectors to be computed in VT and */
4384 /*                 M left singular vectors to be computed in U */
4385
4386 /* Computing MAX */
4387                     i__2 = *n + *m, i__3 = *m * 3;
4388                     if (*lwork >= *m * *m + f2cmax(i__2,i__3)) {
4389
4390 /*                    Sufficient workspace for a fast algorithm */
4391
4392                         iu = 1;
4393                         if (*lwork >= wrkbl + *lda * *m) {
4394
4395 /*                       WORK(IU) is LDA by M */
4396
4397                             ldwrku = *lda;
4398                         } else {
4399
4400 /*                       WORK(IU) is M by M */
4401
4402                             ldwrku = *m;
4403                         }
4404                         itau = iu + ldwrku * *m;
4405                         iwork = itau + *m;
4406
4407 /*                    Compute A=L*Q, copying result to VT */
4408 /*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */
4409 /*                    (RWorkspace: 0) */
4410
4411                         i__2 = *lwork - iwork + 1;
4412                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4413                                 iwork], &i__2, &ierr);
4414                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4415                                 ldvt);
4416
4417 /*                    Generate Q in VT */
4418 /*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) */
4419 /*                    (RWorkspace: 0) */
4420
4421                         i__2 = *lwork - iwork + 1;
4422                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4423                                 work[iwork], &i__2, &ierr);
4424
4425 /*                    Copy L to WORK(IU), zeroing out above it */
4426
4427                         clacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
4428                                 ldwrku);
4429                         i__2 = *m - 1;
4430                         i__3 = *m - 1;
4431                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 
4432                                 ldwrku], &ldwrku);
4433                         ie = 1;
4434                         itauq = itau;
4435                         itaup = itauq + *m;
4436                         iwork = itaup + *m;
4437
4438 /*                    Bidiagonalize L in WORK(IU), copying result to U */
4439 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */
4440 /*                    (RWorkspace: need M) */
4441
4442                         i__2 = *lwork - iwork + 1;
4443                         cgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], &
4444                                 work[itauq], &work[itaup], &work[iwork], &
4445                                 i__2, &ierr);
4446                         clacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
4447                                 ldu);
4448
4449 /*                    Generate right bidiagonalizing vectors in WORK(IU) */
4450 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) */
4451 /*                    (RWorkspace: 0) */
4452
4453                         i__2 = *lwork - iwork + 1;
4454                         cungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
4455                                 , &work[iwork], &i__2, &ierr);
4456
4457 /*                    Generate left bidiagonalizing vectors in U */
4458 /*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */
4459 /*                    (RWorkspace: 0) */
4460
4461                         i__2 = *lwork - iwork + 1;
4462                         cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4463                                  &work[iwork], &i__2, &ierr);
4464                         irwork = ie + *m;
4465
4466 /*                    Perform bidiagonal QR iteration, computing left */
4467 /*                    singular vectors of L in U and computing right */
4468 /*                    singular vectors of L in WORK(IU) */
4469 /*                    (CWorkspace: need M*M) */
4470 /*                    (RWorkspace: need BDSPAC) */
4471
4472                         cbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[
4473                                 iu], &ldwrku, &u[u_offset], ldu, cdum, &c__1, 
4474                                 &rwork[irwork], info);
4475
4476 /*                    Multiply right singular vectors of L in WORK(IU) by */
4477 /*                    Q in VT, storing result in A */
4478 /*                    (CWorkspace: need M*M) */
4479 /*                    (RWorkspace: 0) */
4480
4481                         cgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, &
4482                                 vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda);
4483
4484 /*                    Copy right singular vectors of A from A to VT */
4485
4486                         clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
4487                                 ldvt);
4488
4489                     } else {
4490
4491 /*                    Insufficient workspace for a fast algorithm */
4492
4493                         itau = 1;
4494                         iwork = itau + *m;
4495
4496 /*                    Compute A=L*Q, copying result to VT */
4497 /*                    (CWorkspace: need 2*M, prefer M+M*NB) */
4498 /*                    (RWorkspace: 0) */
4499
4500                         i__2 = *lwork - iwork + 1;
4501                         cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4502                                 iwork], &i__2, &ierr);
4503                         clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4504                                 ldvt);
4505
4506 /*                    Generate Q in VT */
4507 /*                    (CWorkspace: need M+N, prefer M+N*NB) */
4508 /*                    (RWorkspace: 0) */
4509
4510                         i__2 = *lwork - iwork + 1;
4511                         cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4512                                 work[iwork], &i__2, &ierr);
4513
4514 /*                    Copy L to U, zeroing out above it */
4515
4516                         clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
4517                                 ldu);
4518                         i__2 = *m - 1;
4519                         i__3 = *m - 1;
4520                         claset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 <<
4521                                  1) + 1], ldu);
4522                         ie = 1;
4523                         itauq = itau;
4524                         itaup = itauq + *m;
4525                         iwork = itaup + *m;
4526
4527 /*                    Bidiagonalize L in U */
4528 /*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */
4529 /*                    (RWorkspace: need M) */
4530
4531                         i__2 = *lwork - iwork + 1;
4532                         cgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &
4533                                 work[itauq], &work[itaup], &work[iwork], &
4534                                 i__2, &ierr);
4535
4536 /*                    Multiply right bidiagonalizing vectors in U by Q */
4537 /*                    in VT */
4538 /*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */
4539 /*                    (RWorkspace: 0) */
4540
4541                         i__2 = *lwork - iwork + 1;
4542                         cunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, &
4543                                 work[itaup], &vt[vt_offset], ldvt, &work[
4544                                 iwork], &i__2, &ierr);
4545
4546 /*                    Generate left bidiagonalizing vectors in U */
4547 /*                    (CWorkspace: need 3*M, prefer 2*M+M*NB) */
4548 /*                    (RWorkspace: 0) */
4549
4550                         i__2 = *lwork - iwork + 1;
4551                         cungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4552                                  &work[iwork], &i__2, &ierr);
4553                         irwork = ie + *m;
4554
4555 /*                    Perform bidiagonal QR iteration, computing left */
4556 /*                    singular vectors of A in U and computing right */
4557 /*                    singular vectors of A in VT */
4558 /*                    (CWorkspace: 0) */
4559 /*                    (RWorkspace: need BDSPAC) */
4560
4561                         cbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[
4562                                 vt_offset], ldvt, &u[u_offset], ldu, cdum, &
4563                                 c__1, &rwork[irwork], info);
4564
4565                     }
4566
4567                 }
4568
4569             }
4570
4571         } else {
4572
4573 /*           N .LT. MNTHR */
4574
4575 /*           Path 10t(N greater than M, but not much larger) */
4576 /*           Reduce to bidiagonal form without LQ decomposition */
4577
4578             ie = 1;
4579             itauq = 1;
4580             itaup = itauq + *m;
4581             iwork = itaup + *m;
4582
4583 /*           Bidiagonalize A */
4584 /*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) */
4585 /*           (RWorkspace: M) */
4586
4587             i__2 = *lwork - iwork + 1;
4588             cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], 
4589                     &work[itaup], &work[iwork], &i__2, &ierr);
4590             if (wntuas) {
4591
4592 /*              If left singular vectors desired in U, copy result to U */
4593 /*              and generate left bidiagonalizing vectors in U */
4594 /*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) */
4595 /*              (RWorkspace: 0) */
4596
4597                 clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
4598                 i__2 = *lwork - iwork + 1;
4599                 cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
4600                         iwork], &i__2, &ierr);
4601             }
4602             if (wntvas) {
4603
4604 /*              If right singular vectors desired in VT, copy result to */
4605 /*              VT and generate right bidiagonalizing vectors in VT */
4606 /*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) */
4607 /*              (RWorkspace: 0) */
4608
4609                 clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
4610                 if (wntva) {
4611                     nrvt = *n;
4612                 }
4613                 if (wntvs) {
4614                     nrvt = *m;
4615                 }
4616                 i__2 = *lwork - iwork + 1;
4617                 cungbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 
4618                         &work[iwork], &i__2, &ierr);
4619             }
4620             if (wntuo) {
4621
4622 /*              If left singular vectors desired in A, generate left */
4623 /*              bidiagonalizing vectors in A */
4624 /*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) */
4625 /*              (RWorkspace: 0) */
4626
4627                 i__2 = *lwork - iwork + 1;
4628                 cungbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
4629                         iwork], &i__2, &ierr);
4630             }
4631             if (wntvo) {
4632
4633 /*              If right singular vectors desired in A, generate right */
4634 /*              bidiagonalizing vectors in A */
4635 /*              (CWorkspace: need 3*M, prefer 2*M+M*NB) */
4636 /*              (RWorkspace: 0) */
4637
4638                 i__2 = *lwork - iwork + 1;
4639                 cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
4640                         iwork], &i__2, &ierr);
4641             }
4642             irwork = ie + *m;
4643             if (wntuas || wntuo) {
4644                 nru = *m;
4645             }
4646             if (wntun) {
4647                 nru = 0;
4648             }
4649             if (wntvas || wntvo) {
4650                 ncvt = *n;
4651             }
4652             if (wntvn) {
4653                 ncvt = 0;
4654             }
4655             if (! wntuo && ! wntvo) {
4656
4657 /*              Perform bidiagonal QR iteration, if desired, computing */
4658 /*              left singular vectors in U and computing right singular */
4659 /*              vectors in VT */
4660 /*              (CWorkspace: 0) */
4661 /*              (RWorkspace: need BDSPAC) */
4662
4663                 cbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[
4664                         vt_offset], ldvt, &u[u_offset], ldu, cdum, &c__1, &
4665                         rwork[irwork], info);
4666             } else if (! wntuo && wntvo) {
4667
4668 /*              Perform bidiagonal QR iteration, if desired, computing */
4669 /*              left singular vectors in U and computing right singular */
4670 /*              vectors in A */
4671 /*              (CWorkspace: 0) */
4672 /*              (RWorkspace: need BDSPAC) */
4673
4674                 cbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &a[
4675                         a_offset], lda, &u[u_offset], ldu, cdum, &c__1, &
4676                         rwork[irwork], info);
4677             } else {
4678
4679 /*              Perform bidiagonal QR iteration, if desired, computing */
4680 /*              left singular vectors in A and computing right singular */
4681 /*              vectors in VT */
4682 /*              (CWorkspace: 0) */
4683 /*              (RWorkspace: need BDSPAC) */
4684
4685                 cbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[
4686                         vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, &
4687                         rwork[irwork], info);
4688             }
4689
4690         }
4691
4692     }
4693
4694 /*     Undo scaling if necessary */
4695
4696     if (iscl == 1) {
4697         if (anrm > bignum) {
4698             slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
4699                     minmn, &ierr);
4700         }
4701         if (*info != 0 && anrm > bignum) {
4702             i__2 = minmn - 1;
4703             slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &rwork[
4704                     ie], &minmn, &ierr);
4705         }
4706         if (anrm < smlnum) {
4707             slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
4708                     minmn, &ierr);
4709         }
4710         if (*info != 0 && anrm < smlnum) {
4711             i__2 = minmn - 1;
4712             slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &rwork[
4713                     ie], &minmn, &ierr);
4714         }
4715     }
4716
4717 /*     Return optimal workspace in WORK(1) */
4718
4719     work[1].r = (real) maxwrk, work[1].i = 0.f;
4720
4721     return 0;
4722
4723 /*     End of CGESVD */
4724
4725 } /* cgesvd_ */
4726