14 typedef long long BLASLONG;
15 typedef unsigned long long BLASULONG;
17 typedef long BLASLONG;
18 typedef unsigned long BLASULONG;
22 typedef BLASLONG blasint;
24 #define blasabs(x) llabs(x)
26 #define blasabs(x) labs(x)
30 #define blasabs(x) abs(x)
33 typedef blasint integer;
35 typedef unsigned int uinteger;
36 typedef char *address;
37 typedef short int shortint;
39 typedef double doublereal;
40 typedef struct { real r, i; } complex;
41 typedef struct { doublereal r, i; } doublecomplex;
43 static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
44 static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
45 static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
46 static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
48 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
49 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
50 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
51 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
53 #define pCf(z) (*_pCf(z))
54 #define pCd(z) (*_pCd(z))
56 typedef short int shortlogical;
57 typedef char logical1;
58 typedef char integer1;
63 /* Extern is for use with -E */
74 /*external read, write*/
83 /*internal read, write*/
113 /*rewind, backspace, endfile*/
125 ftnint *inex; /*parameters in standard's order*/
151 union Multitype { /* for multiple entry points */
162 typedef union Multitype Multitype;
164 struct Vardesc { /* for Namelist */
170 typedef struct Vardesc Vardesc;
177 typedef struct Namelist Namelist;
179 #define abs(x) ((x) >= 0 ? (x) : -(x))
180 #define dabs(x) (fabs(x))
181 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
182 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
183 #define dmin(a,b) (f2cmin(a,b))
184 #define dmax(a,b) (f2cmax(a,b))
185 #define bit_test(a,b) ((a) >> (b) & 1)
186 #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
187 #define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
189 #define abort_() { sig_die("Fortran abort routine called", 1); }
190 #define c_abs(z) (cabsf(Cf(z)))
191 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
193 #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
194 #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
196 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
197 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
199 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
200 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
201 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
202 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
203 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
204 #define d_abs(x) (fabs(*(x)))
205 #define d_acos(x) (acos(*(x)))
206 #define d_asin(x) (asin(*(x)))
207 #define d_atan(x) (atan(*(x)))
208 #define d_atn2(x, y) (atan2(*(x),*(y)))
209 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
210 #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
211 #define d_cos(x) (cos(*(x)))
212 #define d_cosh(x) (cosh(*(x)))
213 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
214 #define d_exp(x) (exp(*(x)))
215 #define d_imag(z) (cimag(Cd(z)))
216 #define r_imag(z) (cimagf(Cf(z)))
217 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
218 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
219 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
220 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
221 #define d_log(x) (log(*(x)))
222 #define d_mod(x, y) (fmod(*(x), *(y)))
223 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
224 #define d_nint(x) u_nint(*(x))
225 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
226 #define d_sign(a,b) u_sign(*(a),*(b))
227 #define r_sign(a,b) u_sign(*(a),*(b))
228 #define d_sin(x) (sin(*(x)))
229 #define d_sinh(x) (sinh(*(x)))
230 #define d_sqrt(x) (sqrt(*(x)))
231 #define d_tan(x) (tan(*(x)))
232 #define d_tanh(x) (tanh(*(x)))
233 #define i_abs(x) abs(*(x))
234 #define i_dnnt(x) ((integer)u_nint(*(x)))
235 #define i_len(s, n) (n)
236 #define i_nint(x) ((integer)u_nint(*(x)))
237 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
238 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
239 #define pow_si(B,E) spow_ui(*(B),*(E))
240 #define pow_ri(B,E) spow_ui(*(B),*(E))
241 #define pow_di(B,E) dpow_ui(*(B),*(E))
242 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
243 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
244 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
245 #define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
246 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
247 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
248 #define sig_die(s, kill) { exit(1); }
249 #define s_stop(s, n) {exit(0);}
250 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
251 #define z_abs(z) (cabs(Cd(z)))
252 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
253 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
254 #define myexit_() break;
255 #define mycycle() continue;
256 #define myceiling(w) {ceil(w)}
257 #define myhuge(w) {HUGE_VAL}
258 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
259 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
261 /* procedure parameter types for -A and -C++ */
263 #define F2C_proc_par_types 1
265 typedef logical (*L_fp)(...);
267 typedef logical (*L_fp)();
270 static float spow_ui(float x, integer n) {
271 float pow=1.0; unsigned long int u;
273 if(n < 0) n = -n, x = 1/x;
282 static double dpow_ui(double x, integer n) {
283 double pow=1.0; unsigned long int u;
285 if(n < 0) n = -n, x = 1/x;
295 static _Fcomplex cpow_ui(complex x, integer n) {
296 complex pow={1.0,0.0}; unsigned long int u;
298 if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
300 if(u & 01) pow.r *= x.r, pow.i *= x.i;
301 if(u >>= 1) x.r *= x.r, x.i *= x.i;
305 _Fcomplex p={pow.r, pow.i};
309 static _Complex float cpow_ui(_Complex float x, integer n) {
310 _Complex float pow=1.0; unsigned long int u;
312 if(n < 0) n = -n, x = 1/x;
323 static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
324 _Dcomplex pow={1.0,0.0}; unsigned long int u;
326 if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
328 if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
329 if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
333 _Dcomplex p = {pow._Val[0], pow._Val[1]};
337 static _Complex double zpow_ui(_Complex double x, integer n) {
338 _Complex double pow=1.0; unsigned long int u;
340 if(n < 0) n = -n, x = 1/x;
350 static integer pow_ii(integer x, integer n) {
351 integer pow; unsigned long int u;
353 if (n == 0 || x == 1) pow = 1;
354 else if (x != -1) pow = x == 0 ? 1/x : 0;
357 if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
367 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
369 double m; integer i, mi;
370 for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
371 if (w[i-1]>m) mi=i ,m=w[i-1];
374 static integer smaxloc_(float *w, integer s, integer e, integer *n)
376 float m; integer i, mi;
377 for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
378 if (w[i-1]>m) mi=i ,m=w[i-1];
381 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
382 integer n = *n_, incx = *incx_, incy = *incy_, i;
384 _Fcomplex zdotc = {0.0, 0.0};
385 if (incx == 1 && incy == 1) {
386 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
387 zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
388 zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
391 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
392 zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
393 zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
399 _Complex float zdotc = 0.0;
400 if (incx == 1 && incy == 1) {
401 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
402 zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
405 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
406 zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
412 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
413 integer n = *n_, incx = *incx_, incy = *incy_, i;
415 _Dcomplex zdotc = {0.0, 0.0};
416 if (incx == 1 && incy == 1) {
417 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
418 zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
419 zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
422 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
423 zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
424 zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
430 _Complex double zdotc = 0.0;
431 if (incx == 1 && incy == 1) {
432 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
433 zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
436 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
437 zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
443 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
444 integer n = *n_, incx = *incx_, incy = *incy_, i;
446 _Fcomplex zdotc = {0.0, 0.0};
447 if (incx == 1 && incy == 1) {
448 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
449 zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
450 zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
453 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
454 zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
455 zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
461 _Complex float zdotc = 0.0;
462 if (incx == 1 && incy == 1) {
463 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
464 zdotc += Cf(&x[i]) * Cf(&y[i]);
467 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
468 zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
474 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
475 integer n = *n_, incx = *incx_, incy = *incy_, i;
477 _Dcomplex zdotc = {0.0, 0.0};
478 if (incx == 1 && incy == 1) {
479 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
480 zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
481 zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
484 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
485 zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
486 zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
492 _Complex double zdotc = 0.0;
493 if (incx == 1 && incy == 1) {
494 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
495 zdotc += Cd(&x[i]) * Cd(&y[i]);
498 for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
499 zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
505 /* -- translated by f2c (version 20000121).
506 You must link the resulting object file with the libraries:
507 -lf2c -lm (in that order)
513 /* Table of constant values */
515 static doublecomplex c_b1 = {0.,0.};
516 static doublecomplex c_b2 = {1.,0.};
517 static integer c__1 = 1;
518 static integer c__0 = 0;
519 static doublereal c_b42 = 1.;
520 static integer c__2 = 2;
522 /* > \brief <b> ZGESVJ </b> */
524 /* =========== DOCUMENTATION =========== */
526 /* Online html documentation available at */
527 /* http://www.netlib.org/lapack/explore-html/ */
530 /* > Download ZGESVJ + dependencies */
531 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvj.
534 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvj.
537 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvj.
545 /* SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */
546 /* LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) */
548 /* INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N */
549 /* CHARACTER*1 JOBA, JOBU, JOBV */
550 /* COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK ) */
551 /* DOUBLE PRECISION RWORK( LRWORK ), SVA( N ) */
554 /* > \par Purpose: */
559 /* > ZGESVJ computes the singular value decomposition (SVD) of a complex */
560 /* > M-by-N matrix A, where M >= N. The SVD of A is written as */
561 /* > [++] [xx] [x0] [xx] */
562 /* > A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] */
564 /* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
565 /* > matrix, and V is an N-by-N unitary matrix. The diagonal elements */
566 /* > of SIGMA are the singular values of A. The columns of U and V are the */
567 /* > left and the right singular vectors of A, respectively. */
573 /* > \param[in] JOBA */
575 /* > JOBA is CHARACTER*1 */
576 /* > Specifies the structure of A. */
577 /* > = 'L': The input matrix A is lower triangular; */
578 /* > = 'U': The input matrix A is upper triangular; */
579 /* > = 'G': The input matrix A is general M-by-N matrix, M >= N. */
582 /* > \param[in] JOBU */
584 /* > JOBU is CHARACTER*1 */
585 /* > Specifies whether to compute the left singular vectors */
586 /* > (columns of U): */
587 /* > = 'U' or 'F': The left singular vectors corresponding to the nonzero */
588 /* > singular values are computed and returned in the leading */
589 /* > columns of A. See more details in the description of A. */
590 /* > The default numerical orthogonality threshold is set to */
591 /* > approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=DLAMCH('E'). */
592 /* > = 'C': Analogous to JOBU='U', except that user can control the */
593 /* > level of numerical orthogonality of the computed left */
594 /* > singular vectors. TOL can be set to TOL = CTOL*EPS, where */
595 /* > CTOL is given on input in the array WORK. */
596 /* > No CTOL smaller than ONE is allowed. CTOL greater */
597 /* > than 1 / EPS is meaningless. The option 'C' */
598 /* > can be used if M*EPS is satisfactory orthogonality */
599 /* > of the computed left singular vectors, so CTOL=M could */
600 /* > save few sweeps of Jacobi rotations. */
601 /* > See the descriptions of A and WORK(1). */
602 /* > = 'N': The matrix U is not computed. However, see the */
603 /* > description of A. */
606 /* > \param[in] JOBV */
608 /* > JOBV is CHARACTER*1 */
609 /* > Specifies whether to compute the right singular vectors, that */
610 /* > is, the matrix V: */
611 /* > = 'V' or 'J': the matrix V is computed and returned in the array V */
612 /* > = 'A': the Jacobi rotations are applied to the MV-by-N */
613 /* > array V. In other words, the right singular vector */
614 /* > matrix V is not computed explicitly; instead it is */
615 /* > applied to an MV-by-N matrix initially stored in the */
616 /* > first MV rows of V. */
617 /* > = 'N': the matrix V is not computed and the array V is not */
624 /* > The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. */
630 /* > The number of columns of the input matrix A. */
634 /* > \param[in,out] A */
636 /* > A is COMPLEX*16 array, dimension (LDA,N) */
637 /* > On entry, the M-by-N matrix A. */
639 /* > If JOBU = 'U' .OR. JOBU = 'C': */
640 /* > If INFO = 0 : */
641 /* > RANKA orthonormal columns of U are returned in the */
642 /* > leading RANKA columns of the array A. Here RANKA <= N */
643 /* > is the number of computed singular values of A that are */
644 /* > above the underflow threshold DLAMCH('S'). The singular */
645 /* > vectors corresponding to underflowed or zero singular */
646 /* > values are not computed. The value of RANKA is returned */
647 /* > in the array RWORK as RANKA=NINT(RWORK(2)). Also see the */
648 /* > descriptions of SVA and RWORK. The computed columns of U */
649 /* > are mutually numerically orthogonal up to approximately */
650 /* > TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */
651 /* > see the description of JOBU. */
653 /* > the procedure ZGESVJ did not converge in the given number */
654 /* > of iterations (sweeps). In that case, the computed */
655 /* > columns of U may not be orthogonal up to TOL. The output */
656 /* > U (stored in A), SIGMA (given by the computed singular */
657 /* > values in SVA(1:N)) and V is still a decomposition of the */
658 /* > input matrix A in the sense that the residual */
659 /* > || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small. */
660 /* > If JOBU = 'N': */
661 /* > If INFO = 0 : */
662 /* > Note that the left singular vectors are 'for free' in the */
663 /* > one-sided Jacobi SVD algorithm. However, if only the */
664 /* > singular values are needed, the level of numerical */
665 /* > orthogonality of U is not an issue and iterations are */
666 /* > stopped when the columns of the iterated matrix are */
667 /* > numerically orthogonal up to approximately M*EPS. Thus, */
668 /* > on exit, A contains the columns of U scaled with the */
669 /* > corresponding singular values. */
671 /* > the procedure ZGESVJ did not converge in the given number */
672 /* > of iterations (sweeps). */
675 /* > \param[in] LDA */
677 /* > LDA is INTEGER */
678 /* > The leading dimension of the array A. LDA >= f2cmax(1,M). */
681 /* > \param[out] SVA */
683 /* > SVA is DOUBLE PRECISION array, dimension (N) */
685 /* > If INFO = 0 : */
686 /* > depending on the value SCALE = RWORK(1), we have: */
687 /* > If SCALE = ONE: */
688 /* > SVA(1:N) contains the computed singular values of A. */
689 /* > During the computation SVA contains the Euclidean column */
690 /* > norms of the iterated matrices in the array A. */
691 /* > If SCALE .NE. ONE: */
692 /* > The singular values of A are SCALE*SVA(1:N), and this */
693 /* > factored representation is due to the fact that some of the */
694 /* > singular values of A might underflow or overflow. */
697 /* > the procedure ZGESVJ did not converge in the given number of */
698 /* > iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */
701 /* > \param[in] MV */
703 /* > MV is INTEGER */
704 /* > If JOBV = 'A', then the product of Jacobi rotations in ZGESVJ */
705 /* > is applied to the first MV rows of V. See the description of JOBV. */
708 /* > \param[in,out] V */
710 /* > V is COMPLEX*16 array, dimension (LDV,N) */
711 /* > If JOBV = 'V', then V contains on exit the N-by-N matrix of */
712 /* > the right singular vectors; */
713 /* > If JOBV = 'A', then V contains the product of the computed right */
714 /* > singular vector matrix and the initial matrix in */
716 /* > If JOBV = 'N', then V is not referenced. */
719 /* > \param[in] LDV */
721 /* > LDV is INTEGER */
722 /* > The leading dimension of the array V, LDV >= 1. */
723 /* > If JOBV = 'V', then LDV >= f2cmax(1,N). */
724 /* > If JOBV = 'A', then LDV >= f2cmax(1,MV) . */
727 /* > \param[in,out] CWORK */
729 /* > CWORK is COMPLEX*16 array, dimension (f2cmax(1,LWORK)) */
730 /* > Used as workspace. */
731 /* > If on entry LWORK = -1, then a workspace query is assumed and */
732 /* > no computation is done; CWORK(1) is set to the minial (and optimal) */
733 /* > length of CWORK. */
736 /* > \param[in] LWORK */
738 /* > LWORK is INTEGER. */
739 /* > Length of CWORK, LWORK >= M+N. */
742 /* > \param[in,out] RWORK */
744 /* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(6,LRWORK)) */
746 /* > If JOBU = 'C' : */
747 /* > RWORK(1) = CTOL, where CTOL defines the threshold for convergence. */
748 /* > The process stops if all columns of A are mutually */
749 /* > orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */
750 /* > It is required that CTOL >= ONE, i.e. it is not */
751 /* > allowed to force the routine to obtain orthogonality */
752 /* > below EPSILON. */
754 /* > RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */
755 /* > are the computed singular values of A. */
756 /* > (See description of SVA().) */
757 /* > RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero */
758 /* > singular values. */
759 /* > RWORK(3) = NINT(RWORK(3)) is the number of the computed singular */
760 /* > values that are larger than the underflow threshold. */
761 /* > RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi */
762 /* > rotations needed for numerical convergence. */
763 /* > RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */
764 /* > This is useful information in cases when ZGESVJ did */
765 /* > not converge, as it can be used to estimate whether */
766 /* > the output is still useful and for post festum analysis. */
767 /* > RWORK(6) = the largest absolute value over all sines of the */
768 /* > Jacobi rotation angles in the last sweep. It can be */
769 /* > useful for a post festum analysis. */
770 /* > If on entry LRWORK = -1, then a workspace query is assumed and */
771 /* > no computation is done; RWORK(1) is set to the minial (and optimal) */
772 /* > length of RWORK. */
775 /* > \param[in] LRWORK */
777 /* > LRWORK is INTEGER */
778 /* > Length of RWORK, LRWORK >= MAX(6,N). */
781 /* > \param[out] INFO */
783 /* > INFO is INTEGER */
784 /* > = 0: successful exit. */
785 /* > < 0: if INFO = -i, then the i-th argument had an illegal value */
786 /* > > 0: ZGESVJ did not converge in the maximal allowed number */
787 /* > (NSWEEP=30) of sweeps. The output may still be useful. */
788 /* > See the description of RWORK. */
794 /* > \author Univ. of Tennessee */
795 /* > \author Univ. of California Berkeley */
796 /* > \author Univ. of Colorado Denver */
797 /* > \author NAG Ltd. */
799 /* > \date June 2016 */
801 /* > \ingroup complex16GEcomputational */
803 /* > \par Further Details: */
804 /* ===================== */
808 /* > The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */
809 /* > rotations. In the case of underflow of the tangent of the Jacobi angle, a */
810 /* > modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses */
811 /* > column interchanges of de Rijk [1]. The relative accuracy of the computed */
812 /* > singular values and the accuracy of the computed singular vectors (in */
813 /* > angle metric) is as guaranteed by the theory of Demmel and Veselic [2]. */
814 /* > The condition number that determines the accuracy in the full rank case */
815 /* > is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */
816 /* > spectral condition number. The best performance of this Jacobi SVD */
817 /* > procedure is achieved if used in an accelerated version of Drmac and */
818 /* > Veselic [4,5], and it is the kernel routine in the SIGMA library [6]. */
819 /* > Some tunning parameters (marked with [TP]) are available for the */
821 /* > The computational range for the nonzero singular values is the machine */
822 /* > number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */
823 /* > denormalized singular values can be computed with the corresponding */
824 /* > gradual loss of accurate digits. */
827 /* > \par Contributor: */
828 /* ================== */
834 /* > Zlatko Drmac (Zagreb, Croatia) */
838 /* > \par References: */
839 /* ================ */
843 /* > [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */
844 /* > singular value decomposition on a vector computer. */
845 /* > SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */
846 /* > [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */
847 /* > [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular */
848 /* > value computation in floating point arithmetic. */
849 /* > SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */
850 /* > [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */
851 /* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */
852 /* > LAPACK Working note 169. */
853 /* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */
854 /* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */
855 /* > LAPACK Working note 170. */
856 /* > [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
857 /* > QSVD, (H,K)-SVD computations. */
858 /* > Department of Mathematics, University of Zagreb, 2008, 2015. */
861 /* > \par Bugs, examples and comments: */
862 /* ================================= */
865 /* > =========================== */
866 /* > Please report all bugs and send interesting test examples and comments to */
867 /* > drmac@math.hr. Thank you. */
870 /* ===================================================================== */
871 /* Subroutine */ int zgesvj_(char *joba, char *jobu, char *jobv, integer *m,
872 integer *n, doublecomplex *a, integer *lda, doublereal *sva, integer *
873 mv, doublecomplex *v, integer *ldv, doublecomplex *cwork, integer *
874 lwork, doublereal *rwork, integer *lrwork, integer *info)
876 /* System generated locals */
877 integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5,
879 doublereal d__1, d__2;
880 doublecomplex z__1, z__2, z__3;
882 /* Local variables */
885 doublereal aaqq, ctol;
890 extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *,
891 doublecomplex *, integer *, doublereal *, doublecomplex *);
892 doublereal aapp0, aapq1, temp1;
894 doublereal t, apoaq, aqoap;
895 extern logical lsame_(char *, char *);
896 doublereal theta, small, sfmin;
899 logical applv, rsvec, uctol;
900 extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
901 doublecomplex *, integer *, doublecomplex *, integer *);
902 logical lower, upper, rotok;
904 extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
905 doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
906 integer *, doublecomplex *, integer *), zaxpy_(integer *,
907 doublecomplex *, doublecomplex *, integer *, doublecomplex *,
909 doublereal rootsfmin;
910 extern doublereal dznrm2_(integer *, doublecomplex *, integer *);
911 extern /* Subroutine */ int zgsvj0_(char *, integer *, integer *,
912 doublecomplex *, integer *, doublecomplex *, doublereal *,
913 integer *, doublecomplex *, integer *, doublereal *, doublereal *,
914 doublereal *, integer *, doublecomplex *, integer *, integer *), zgsvj1_(char *, integer *, integer *, integer *,
915 doublecomplex *, integer *, doublecomplex *, doublereal *,
916 integer *, doublecomplex *, integer *, doublereal *, doublereal *,
917 doublereal *, integer *, doublecomplex *, integer *, integer *);
920 extern doublereal dlamch_(char *);
922 extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
923 doublereal *, doublereal *, integer *, integer *, doublereal *,
924 integer *, integer *);
925 extern integer idamax_(integer *, doublereal *, integer *);
926 extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
927 integer ijblsk, swband;
928 extern /* Subroutine */ int zdscal_(integer *, doublereal *,
929 doublecomplex *, integer *);
932 extern /* Subroutine */ int zlascl_(char *, integer *, integer *,
933 doublereal *, doublereal *, integer *, integer *, doublecomplex *,
934 integer *, integer *);
936 extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
937 doublecomplex *, doublecomplex *, doublecomplex *, integer *);
940 extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
941 doublereal *, doublereal *);
944 integer notrot, iswrot, jbc;
946 integer kbl, lkahead, igl, ibr, jgl, nbl;
952 doublereal rootbig, rooteps;
957 /* -- LAPACK computational routine (version 3.8.0) -- */
958 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
959 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
963 /* ===================================================================== */
970 /* Test the input arguments */
972 /* Parameter adjustments */
975 a_offset = 1 + a_dim1 * 1;
978 v_offset = 1 + v_dim1 * 1;
984 lsvec = lsame_(jobu, "U") || lsame_(jobu, "F");
985 uctol = lsame_(jobu, "C");
986 rsvec = lsame_(jobv, "V") || lsame_(jobv, "J");
987 applv = lsame_(jobv, "A");
988 upper = lsame_(joba, "U");
989 lower = lsame_(joba, "L");
991 lquery = *lwork == -1 || *lrwork == -1;
992 if (! (upper || lower || lsame_(joba, "G"))) {
994 } else if (! (lsvec || uctol || lsame_(jobu, "N")))
997 } else if (! (rsvec || applv || lsame_(jobv, "N")))
1000 } else if (*m < 0) {
1002 } else if (*n < 0 || *n > *m) {
1004 } else if (*lda < *m) {
1006 } else if (*mv < 0) {
1008 } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
1010 } else if (uctol && rwork[1] <= 1.) {
1012 } else if (*lwork < *m + *n && ! lquery) {
1014 } else if (*lrwork < f2cmax(*n,6) && ! lquery) {
1023 xerbla_("ZGESVJ", &i__1, (ftnlen)6);
1025 } else if (lquery) {
1027 cwork[1].r = (doublereal) i__1, cwork[1].i = 0.;
1028 rwork[1] = (doublereal) f2cmax(*n,6);
1032 /* #:) Quick return for void matrix */
1034 if (*m == 0 || *n == 0) {
1038 /* Set numerical parameters */
1039 /* The stopping criterion for Jacobi rotations is */
1041 /* max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS */
1043 /* where EPS is the round-off and CTOL is defined as follows: */
1046 /* ... user controlled */
1050 if (lsvec || rsvec || applv) {
1051 ctol = sqrt((doublereal) (*m));
1053 ctol = (doublereal) (*m);
1056 /* ... and the machine dependent parameters are */
1057 /* [!] (Make sure that SLAMCH() works properly on the target machine.) */
1059 epsln = dlamch_("Epsilon");
1060 rooteps = sqrt(epsln);
1061 sfmin = dlamch_("SafeMinimum");
1062 rootsfmin = sqrt(sfmin);
1063 small = sfmin / epsln;
1064 big = dlamch_("Overflow");
1065 /* BIG = ONE / SFMIN */
1066 rootbig = 1. / rootsfmin;
1067 /* LARGE = BIG / SQRT( DBLE( M*N ) ) */
1068 bigtheta = 1. / rooteps;
1071 roottol = sqrt(tol);
1073 if ((doublereal) (*m) * epsln >= 1.) {
1076 xerbla_("ZGESVJ", &i__1, (ftnlen)6);
1080 /* Initialize the right singular vector matrix. */
1084 zlaset_("A", &mvl, n, &c_b1, &c_b2, &v[v_offset], ldv);
1088 rsvec = rsvec || applv;
1090 /* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */
1091 /* (!) If necessary, scale A to protect the largest singular value */
1092 /* from overflow. It is possible that saving the largest singular */
1093 /* value destroys the information about the small ones. */
1094 /* This initial scaling is almost minimal in the sense that the */
1095 /* goal is to make sure that no column norm overflows, and that */
1096 /* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */
1097 /* in A are detected, the procedure returns with INFO=-6. */
1099 skl = 1. / sqrt((doublereal) (*m) * (doublereal) (*n));
1104 /* the input matrix is M-by-N lower triangular (trapezoidal) */
1106 for (p = 1; p <= i__1; ++p) {
1110 zlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq);
1114 xerbla_("ZGESVJ", &i__2, (ftnlen)6);
1118 if (aapp < big / aaqq && noscale) {
1119 sva[p] = aapp * aaqq;
1122 sva[p] = aapp * (aaqq * skl);
1126 for (q = 1; q <= i__2; ++q) {
1135 /* the input matrix is M-by-N upper triangular (trapezoidal) */
1137 for (p = 1; p <= i__1; ++p) {
1140 zlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
1144 xerbla_("ZGESVJ", &i__2, (ftnlen)6);
1148 if (aapp < big / aaqq && noscale) {
1149 sva[p] = aapp * aaqq;
1152 sva[p] = aapp * (aaqq * skl);
1156 for (q = 1; q <= i__2; ++q) {
1165 /* the input matrix is M-by-N general dense */
1167 for (p = 1; p <= i__1; ++p) {
1170 zlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
1174 xerbla_("ZGESVJ", &i__2, (ftnlen)6);
1178 if (aapp < big / aaqq && noscale) {
1179 sva[p] = aapp * aaqq;
1182 sva[p] = aapp * (aaqq * skl);
1186 for (q = 1; q <= i__2; ++q) {
1200 /* Move the smaller part of the spectrum from the underflow threshold */
1201 /* (!) Start by determining the position of the nonzero entries of the */
1202 /* array SVA() relative to ( SFMIN, BIG ). */
1207 for (p = 1; p <= i__1; ++p) {
1210 d__1 = aaqq, d__2 = sva[p];
1211 aaqq = f2cmin(d__1,d__2);
1214 d__1 = aapp, d__2 = sva[p];
1215 aapp = f2cmax(d__1,d__2);
1219 /* #:) Quick return for zero matrix */
1223 zlaset_("G", m, n, &c_b1, &c_b2, &a[a_offset], lda);
1234 /* #:) Quick return for one-column matrix */
1238 zlascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1]
1241 rwork[1] = 1. / skl;
1242 if (sva[1] >= sfmin) {
1254 /* Protect small singular values from underflow, and try to */
1255 /* avoid underflows/overflows in computing Jacobi rotations. */
1257 sn = sqrt(sfmin / epsln);
1258 temp1 = sqrt(big / (doublereal) (*n));
1259 if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) {
1261 d__1 = big, d__2 = temp1 / aapp;
1262 temp1 = f2cmin(d__1,d__2);
1263 /* AAQQ = AAQQ*TEMP1 */
1264 /* AAPP = AAPP*TEMP1 */
1265 } else if (aaqq <= sn && aapp <= temp1) {
1267 d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n)));
1268 temp1 = f2cmin(d__1,d__2);
1269 /* AAQQ = AAQQ*TEMP1 */
1270 /* AAPP = AAPP*TEMP1 */
1271 } else if (aaqq >= sn && aapp >= temp1) {
1273 d__1 = sn / aaqq, d__2 = temp1 / aapp;
1274 temp1 = f2cmax(d__1,d__2);
1275 /* AAQQ = AAQQ*TEMP1 */
1276 /* AAPP = AAPP*TEMP1 */
1277 } else if (aaqq <= sn && aapp >= temp1) {
1279 d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp);
1280 temp1 = f2cmin(d__1,d__2);
1281 /* AAQQ = AAQQ*TEMP1 */
1282 /* AAPP = AAPP*TEMP1 */
1287 /* Scale, if necessary */
1290 dlascl_("G", &c__0, &c__0, &c_b42, &temp1, n, &c__1, &sva[1], n, &
1295 zlascl_(joba, &c__0, &c__0, &c_b42, &skl, m, n, &a[a_offset], lda, &
1300 /* Row-cyclic Jacobi SVD algorithm with column pivoting */
1302 emptsw = *n * (*n - 1) / 2;
1305 for (q = 1; q <= i__1; ++q) {
1307 cwork[i__2].r = 1., cwork[i__2].i = 0.;
1314 /* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */
1315 /* if ZGESVJ is used as a computational routine in the preconditioned */
1316 /* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure */
1317 /* works on pivots inside a band-like region around the diagonal. */
1318 /* The boundaries are determined dynamically, based on the number of */
1319 /* pivots above a threshold. */
1322 /* [TP] KBL is a tuning parameter that defines the tile size in the */
1323 /* tiling of the p-q loops of pivot pairs. In general, an optimal */
1324 /* value of KBL depends on the matrix dimensions and on the */
1325 /* parameters of the computer's memory. */
1328 if (nbl * kbl != *n) {
1332 /* Computing 2nd power */
1334 blskip = i__1 * i__1;
1335 /* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
1337 rowskip = f2cmin(5,kbl);
1338 /* [TP] ROWSKIP is a tuning parameter. */
1341 /* [TP] LKAHEAD is a tuning parameter. */
1343 /* Quasi block transformations, using the lower (upper) triangular */
1344 /* structure of the input matrix. The quasi-block-cycling usually */
1345 /* invokes cubic convergence. Big part of this cycle is done inside */
1346 /* canonical subspaces of dimensions less than M. */
1349 i__1 = 64, i__2 = kbl << 2;
1350 if ((lower || upper) && *n > f2cmax(i__1,i__2)) {
1351 /* [TP] The number of partition levels and the actual partition are */
1352 /* tuning parameters. */
1364 /* This works very well on lower triangular matrices, in particular */
1365 /* in the framework of the preconditioned Jacobi SVD (xGEJSV). */
1366 /* The idea is simple: */
1367 /* [+ 0 0 0] Note that Jacobi transformations of [0 0] */
1368 /* [+ + 0 0] [0 0] */
1369 /* [+ + x 0] actually work on [x 0] [x 0] */
1370 /* [+ + x x] [x x]. [x x] */
1375 zgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda,
1376 &cwork[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + (
1377 n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &
1378 cwork[*n + 1], &i__3, &ierr);
1382 zgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, &
1383 cwork[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 +
1384 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &cwork[*n
1385 + 1], &i__3, &ierr);
1389 zgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1],
1390 lda, &cwork[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (
1391 n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &
1392 cwork[*n + 1], &i__3, &ierr);
1396 zgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, &
1397 cwork[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 +
1398 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n
1399 + 1], &i__3, &ierr);
1402 zgsvj0_(jobv, m, &n4, &a[a_offset], lda, &cwork[1], &sva[1], &mvl,
1403 &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*
1404 n + 1], &i__1, &ierr);
1407 zgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &cwork[1], &sva[1],
1408 &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
1409 cwork[*n + 1], &i__1, &ierr);
1416 zgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &cwork[1], &sva[1], &
1417 mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, &
1418 cwork[*n + 1], &i__1, &ierr);
1421 zgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &cwork[n4
1422 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) *
1423 v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n + 1],
1427 zgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &cwork[1], &sva[1]
1428 , &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
1429 cwork[*n + 1], &i__1, &ierr);
1433 zgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &cwork[
1434 n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) *
1435 v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n + 1],
1442 for (i__ = 1; i__ <= 30; ++i__) {
1452 /* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */
1453 /* 1 <= p < q <= N. This is the first step toward a blocked implementation */
1454 /* of the rotations. New implementation, based on block transformations, */
1455 /* is under development. */
1458 for (ibr = 1; ibr <= i__1; ++ibr) {
1460 igl = (ibr - 1) * kbl + 1;
1463 i__3 = lkahead, i__4 = nbl - ibr;
1464 i__2 = f2cmin(i__3,i__4);
1465 for (ir1 = 0; ir1 <= i__2; ++ir1) {
1470 i__4 = igl + kbl - 1, i__5 = *n - 1;
1471 i__3 = f2cmin(i__4,i__5);
1472 for (p = igl; p <= i__3; ++p) {
1476 q = idamax_(&i__4, &sva[p], &c__1) + p - 1;
1478 zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 +
1481 zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
1482 v_dim1 + 1], &c__1);
1488 aapq.r = cwork[i__4].r, aapq.i = cwork[i__4].i;
1491 cwork[i__4].r = cwork[i__5].r, cwork[i__4].i = cwork[
1494 cwork[i__4].r = aapq.r, cwork[i__4].i = aapq.i;
1499 /* Column norms are periodically updated by explicit */
1500 /* norm computation. */
1502 /* Unfortunately, some BLAS implementations compute DZNRM2(M,A(1,p),1) */
1503 /* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to */
1504 /* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */
1505 /* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */
1506 /* Hence, DZNRM2 cannot be trusted, not even in the case when */
1507 /* the true norm is far from the under(over)flow boundaries. */
1508 /* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF */
1509 /* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )". */
1511 if (sva[p] < rootbig && sva[p] > rootsfmin) {
1512 sva[p] = dznrm2_(m, &a[p * a_dim1 + 1], &c__1);
1516 zlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, &
1518 sva[p] = temp1 * sqrt(aapp);
1530 i__5 = igl + kbl - 1;
1531 i__4 = f2cmin(i__5,*n);
1532 for (q = p + 1; q <= i__4; ++q) {
1540 rotok = small * aapp <= aaqq;
1541 if (aapp < big / aaqq) {
1542 zdotc_(&z__3, m, &a[p * a_dim1 + 1], &
1543 c__1, &a[q * a_dim1 + 1], &
1545 z__2.r = z__3.r / aaqq, z__2.i =
1547 z__1.r = z__2.r / aapp, z__1.i =
1549 aapq.r = z__1.r, aapq.i = z__1.i;
1551 zcopy_(m, &a[p * a_dim1 + 1], &c__1, &
1552 cwork[*n + 1], &c__1);
1553 zlascl_("G", &c__0, &c__0, &aapp, &
1554 c_b42, m, &c__1, &cwork[*n +
1556 zdotc_(&z__2, m, &cwork[*n + 1], &
1557 c__1, &a[q * a_dim1 + 1], &
1559 z__1.r = z__2.r / aaqq, z__1.i =
1561 aapq.r = z__1.r, aapq.i = z__1.i;
1564 rotok = aapp <= aaqq / small;
1565 if (aapp > small / aaqq) {
1566 zdotc_(&z__3, m, &a[p * a_dim1 + 1], &
1567 c__1, &a[q * a_dim1 + 1], &
1569 z__2.r = z__3.r / aapp, z__2.i =
1571 z__1.r = z__2.r / aaqq, z__1.i =
1573 aapq.r = z__1.r, aapq.i = z__1.i;
1575 zcopy_(m, &a[q * a_dim1 + 1], &c__1, &
1576 cwork[*n + 1], &c__1);
1577 zlascl_("G", &c__0, &c__0, &aaqq, &
1578 c_b42, m, &c__1, &cwork[*n +
1580 zdotc_(&z__2, m, &a[p * a_dim1 + 1], &
1581 c__1, &cwork[*n + 1], &c__1);
1582 z__1.r = z__2.r / aapp, z__1.i =
1584 aapq.r = z__1.r, aapq.i = z__1.i;
1588 /* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) */
1589 aapq1 = -z_abs(&aapq);
1591 d__1 = mxaapq, d__2 = -aapq1;
1592 mxaapq = f2cmax(d__1,d__2);
1594 /* TO rotate or NOT to rotate, THAT is the question ... */
1596 if (abs(aapq1) > tol) {
1597 d__1 = z_abs(&aapq);
1598 z__1.r = aapq.r / d__1, z__1.i = aapq.i /
1600 ompq.r = z__1.r, ompq.i = z__1.i;
1602 /* [RTD] ROTATED = ROTATED + ONE */
1612 aqoap = aaqq / aapp;
1613 apoaq = aapp / aaqq;
1614 theta = (d__1 = aqoap - apoaq, abs(
1615 d__1)) * -.5 / aapq1;
1617 if (abs(theta) > bigtheta) {
1621 d_cnjg(&z__2, &ompq);
1622 z__1.r = t * z__2.r, z__1.i = t *
1624 zrot_(m, &a[p * a_dim1 + 1], &
1625 c__1, &a[q * a_dim1 + 1],
1628 d_cnjg(&z__2, &ompq);
1629 z__1.r = t * z__2.r, z__1.i = t * z__2.i;
1630 zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
1631 v_dim1 + 1], &c__1, &cs, &z__1);
1634 d__1 = 0., d__2 = t * apoaq *
1636 sva[q] = aaqq * sqrt((f2cmax(d__1,
1639 d__1 = 0., d__2 = 1. - t * aqoap *
1641 aapp *= sqrt((f2cmax(d__1,d__2)));
1643 d__1 = mxsinj, d__2 = abs(t);
1644 mxsinj = f2cmax(d__1,d__2);
1649 thsign = -d_sign(&c_b42, &aapq1);
1650 t = 1. / (theta + thsign * sqrt(
1651 theta * theta + 1.));
1652 cs = sqrt(1. / (t * t + 1.));
1656 d__1 = mxsinj, d__2 = abs(sn);
1657 mxsinj = f2cmax(d__1,d__2);
1659 d__1 = 0., d__2 = t * apoaq *
1661 sva[q] = aaqq * sqrt((f2cmax(d__1,
1664 d__1 = 0., d__2 = 1. - t * aqoap *
1666 aapp *= sqrt((f2cmax(d__1,d__2)));
1668 d_cnjg(&z__2, &ompq);
1669 z__1.r = sn * z__2.r, z__1.i = sn
1671 zrot_(m, &a[p * a_dim1 + 1], &
1672 c__1, &a[q * a_dim1 + 1],
1675 d_cnjg(&z__2, &ompq);
1676 z__1.r = sn * z__2.r, z__1.i = sn * z__2.i;
1677 zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
1678 v_dim1 + 1], &c__1, &cs, &z__1);
1683 z__2.r = -cwork[i__6].r, z__2.i =
1685 z__1.r = z__2.r * ompq.r - z__2.i *
1686 ompq.i, z__1.i = z__2.r *
1687 ompq.i + z__2.i * ompq.r;
1688 cwork[i__5].r = z__1.r, cwork[i__5].i
1692 zcopy_(m, &a[p * a_dim1 + 1], &c__1, &
1693 cwork[*n + 1], &c__1);
1694 zlascl_("G", &c__0, &c__0, &aapp, &
1695 c_b42, m, &c__1, &cwork[*n +
1697 zlascl_("G", &c__0, &c__0, &aaqq, &
1698 c_b42, m, &c__1, &a[q *
1699 a_dim1 + 1], lda, &ierr);
1700 z__1.r = -aapq.r, z__1.i = -aapq.i;
1701 zaxpy_(m, &z__1, &cwork[*n + 1], &
1702 c__1, &a[q * a_dim1 + 1], &
1704 zlascl_("G", &c__0, &c__0, &c_b42, &
1705 aaqq, m, &c__1, &a[q * a_dim1
1708 d__1 = 0., d__2 = 1. - aapq1 * aapq1;
1709 sva[q] = aaqq * sqrt((f2cmax(d__1,d__2)))
1711 mxsinj = f2cmax(mxsinj,sfmin);
1713 /* END IF ROTOK THEN ... ELSE */
1715 /* In the case of cancellation in updating SVA(q), SVA(p) */
1716 /* recompute SVA(q), SVA(p). */
1718 /* Computing 2nd power */
1719 d__1 = sva[q] / aaqq;
1720 if (d__1 * d__1 <= rooteps) {
1721 if (aaqq < rootbig && aaqq >
1723 sva[q] = dznrm2_(m, &a[q * a_dim1
1728 zlassq_(m, &a[q * a_dim1 + 1], &
1730 sva[q] = t * sqrt(aaqq);
1733 if (aapp / aapp0 <= rooteps) {
1734 if (aapp < rootbig && aapp >
1736 aapp = dznrm2_(m, &a[p * a_dim1 +
1741 zlassq_(m, &a[p * a_dim1 + 1], &
1743 aapp = t * sqrt(aapp);
1749 /* A(:,p) and A(:,q) already numerically orthogonal */
1753 /* [RTD] SKIPPED = SKIPPED + 1 */
1757 /* A(:,q) is zero column */
1764 if (i__ <= swband && pskipped > rowskip) {
1777 /* bailed out of q-loop */
1783 if (ir1 == 0 && aapp == 0.) {
1785 i__4 = igl + kbl - 1;
1786 notrot = notrot + f2cmin(i__4,*n) - p;
1792 /* end of the p-loop */
1793 /* end of doing the block ( ibr, ibr ) */
1796 /* end of ir1-loop */
1798 /* ... go to the off diagonal blocks */
1800 igl = (ibr - 1) * kbl + 1;
1803 for (jbc = ibr + 1; jbc <= i__2; ++jbc) {
1805 jgl = (jbc - 1) * kbl + 1;
1807 /* doing the block at ( ibr, jbc ) */
1811 i__4 = igl + kbl - 1;
1812 i__3 = f2cmin(i__4,*n);
1813 for (p = igl; p <= i__3; ++p) {
1821 i__5 = jgl + kbl - 1;
1822 i__4 = f2cmin(i__5,*n);
1823 for (q = jgl; q <= i__4; ++q) {
1830 /* Safe Gram matrix computation */
1834 rotok = small * aapp <= aaqq;
1836 rotok = small * aaqq <= aapp;
1838 if (aapp < big / aaqq) {
1839 zdotc_(&z__3, m, &a[p * a_dim1 + 1], &
1840 c__1, &a[q * a_dim1 + 1], &
1842 z__2.r = z__3.r / aaqq, z__2.i =
1844 z__1.r = z__2.r / aapp, z__1.i =
1846 aapq.r = z__1.r, aapq.i = z__1.i;
1848 zcopy_(m, &a[p * a_dim1 + 1], &c__1, &
1849 cwork[*n + 1], &c__1);
1850 zlascl_("G", &c__0, &c__0, &aapp, &
1851 c_b42, m, &c__1, &cwork[*n +
1853 zdotc_(&z__2, m, &cwork[*n + 1], &
1854 c__1, &a[q * a_dim1 + 1], &
1856 z__1.r = z__2.r / aaqq, z__1.i =
1858 aapq.r = z__1.r, aapq.i = z__1.i;
1862 rotok = aapp <= aaqq / small;
1864 rotok = aaqq <= aapp / small;
1866 if (aapp > small / aaqq) {
1867 zdotc_(&z__3, m, &a[p * a_dim1 + 1], &
1868 c__1, &a[q * a_dim1 + 1], &
1870 d__1 = f2cmax(aaqq,aapp);
1871 z__2.r = z__3.r / d__1, z__2.i =
1873 d__2 = f2cmin(aaqq,aapp);
1874 z__1.r = z__2.r / d__2, z__1.i =
1876 aapq.r = z__1.r, aapq.i = z__1.i;
1878 zcopy_(m, &a[q * a_dim1 + 1], &c__1, &
1879 cwork[*n + 1], &c__1);
1880 zlascl_("G", &c__0, &c__0, &aaqq, &
1881 c_b42, m, &c__1, &cwork[*n +
1883 zdotc_(&z__2, m, &a[p * a_dim1 + 1], &
1884 c__1, &cwork[*n + 1], &c__1);
1885 z__1.r = z__2.r / aapp, z__1.i =
1887 aapq.r = z__1.r, aapq.i = z__1.i;
1891 /* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */
1892 aapq1 = -z_abs(&aapq);
1894 d__1 = mxaapq, d__2 = -aapq1;
1895 mxaapq = f2cmax(d__1,d__2);
1897 /* TO rotate or NOT to rotate, THAT is the question ... */
1899 if (abs(aapq1) > tol) {
1900 d__1 = z_abs(&aapq);
1901 z__1.r = aapq.r / d__1, z__1.i = aapq.i /
1903 ompq.r = z__1.r, ompq.i = z__1.i;
1905 /* [RTD] ROTATED = ROTATED + 1 */
1911 aqoap = aaqq / aapp;
1912 apoaq = aapp / aaqq;
1913 theta = (d__1 = aqoap - apoaq, abs(
1914 d__1)) * -.5 / aapq1;
1919 if (abs(theta) > bigtheta) {
1922 d_cnjg(&z__2, &ompq);
1923 z__1.r = t * z__2.r, z__1.i = t *
1925 zrot_(m, &a[p * a_dim1 + 1], &
1926 c__1, &a[q * a_dim1 + 1],
1929 d_cnjg(&z__2, &ompq);
1930 z__1.r = t * z__2.r, z__1.i = t * z__2.i;
1931 zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
1932 v_dim1 + 1], &c__1, &cs, &z__1);
1935 d__1 = 0., d__2 = t * apoaq *
1937 sva[q] = aaqq * sqrt((f2cmax(d__1,
1940 d__1 = 0., d__2 = 1. - t * aqoap *
1942 aapp *= sqrt((f2cmax(d__1,d__2)));
1944 d__1 = mxsinj, d__2 = abs(t);
1945 mxsinj = f2cmax(d__1,d__2);
1949 thsign = -d_sign(&c_b42, &aapq1);
1953 t = 1. / (theta + thsign * sqrt(
1954 theta * theta + 1.));
1955 cs = sqrt(1. / (t * t + 1.));
1958 d__1 = mxsinj, d__2 = abs(sn);
1959 mxsinj = f2cmax(d__1,d__2);
1961 d__1 = 0., d__2 = t * apoaq *
1963 sva[q] = aaqq * sqrt((f2cmax(d__1,
1966 d__1 = 0., d__2 = 1. - t * aqoap *
1968 aapp *= sqrt((f2cmax(d__1,d__2)));
1970 d_cnjg(&z__2, &ompq);
1971 z__1.r = sn * z__2.r, z__1.i = sn
1973 zrot_(m, &a[p * a_dim1 + 1], &
1974 c__1, &a[q * a_dim1 + 1],
1977 d_cnjg(&z__2, &ompq);
1978 z__1.r = sn * z__2.r, z__1.i = sn * z__2.i;
1979 zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
1980 v_dim1 + 1], &c__1, &cs, &z__1);
1985 z__2.r = -cwork[i__6].r, z__2.i =
1987 z__1.r = z__2.r * ompq.r - z__2.i *
1988 ompq.i, z__1.i = z__2.r *
1989 ompq.i + z__2.i * ompq.r;
1990 cwork[i__5].r = z__1.r, cwork[i__5].i
1995 zcopy_(m, &a[p * a_dim1 + 1], &
1996 c__1, &cwork[*n + 1], &
1998 zlascl_("G", &c__0, &c__0, &aapp,
1999 &c_b42, m, &c__1, &cwork[*
2000 n + 1], lda, &ierr);
2001 zlascl_("G", &c__0, &c__0, &aaqq,
2002 &c_b42, m, &c__1, &a[q *
2003 a_dim1 + 1], lda, &ierr);
2004 z__1.r = -aapq.r, z__1.i =
2006 zaxpy_(m, &z__1, &cwork[*n + 1], &
2007 c__1, &a[q * a_dim1 + 1],
2009 zlascl_("G", &c__0, &c__0, &c_b42,
2010 &aaqq, m, &c__1, &a[q *
2011 a_dim1 + 1], lda, &ierr);
2013 d__1 = 0., d__2 = 1. - aapq1 *
2015 sva[q] = aaqq * sqrt((f2cmax(d__1,
2017 mxsinj = f2cmax(mxsinj,sfmin);
2019 zcopy_(m, &a[q * a_dim1 + 1], &
2020 c__1, &cwork[*n + 1], &
2022 zlascl_("G", &c__0, &c__0, &aaqq,
2023 &c_b42, m, &c__1, &cwork[*
2024 n + 1], lda, &ierr);
2025 zlascl_("G", &c__0, &c__0, &aapp,
2026 &c_b42, m, &c__1, &a[p *
2027 a_dim1 + 1], lda, &ierr);
2028 d_cnjg(&z__2, &aapq);
2029 z__1.r = -z__2.r, z__1.i =
2031 zaxpy_(m, &z__1, &cwork[*n + 1], &
2032 c__1, &a[p * a_dim1 + 1],
2034 zlascl_("G", &c__0, &c__0, &c_b42,
2035 &aapp, m, &c__1, &a[p *
2036 a_dim1 + 1], lda, &ierr);
2038 d__1 = 0., d__2 = 1. - aapq1 *
2040 sva[p] = aapp * sqrt((f2cmax(d__1,
2042 mxsinj = f2cmax(mxsinj,sfmin);
2045 /* END IF ROTOK THEN ... ELSE */
2047 /* In the case of cancellation in updating SVA(q), SVA(p) */
2048 /* Computing 2nd power */
2049 d__1 = sva[q] / aaqq;
2050 if (d__1 * d__1 <= rooteps) {
2051 if (aaqq < rootbig && aaqq >
2053 sva[q] = dznrm2_(m, &a[q * a_dim1
2058 zlassq_(m, &a[q * a_dim1 + 1], &
2060 sva[q] = t * sqrt(aaqq);
2063 /* Computing 2nd power */
2064 d__1 = aapp / aapp0;
2065 if (d__1 * d__1 <= rooteps) {
2066 if (aapp < rootbig && aapp >
2068 aapp = dznrm2_(m, &a[p * a_dim1 +
2073 zlassq_(m, &a[p * a_dim1 + 1], &
2075 aapp = t * sqrt(aapp);
2079 /* end of OK rotation */
2082 /* [RTD] SKIPPED = SKIPPED + 1 */
2092 if (i__ <= swband && ijblsk >= blskip) {
2097 if (i__ <= swband && pskipped > rowskip) {
2105 /* end of the q-loop */
2114 i__4 = jgl + kbl - 1;
2115 notrot = notrot + f2cmin(i__4,*n) - jgl + 1;
2125 /* end of the p-loop */
2128 /* end of the jbc-loop */
2130 /* 2011 bailed out of the jbc-loop */
2132 i__3 = igl + kbl - 1;
2133 i__2 = f2cmin(i__3,*n);
2134 for (p = igl; p <= i__2; ++p) {
2135 sva[p] = (d__1 = sva[p], abs(d__1));
2141 /* 2000 :: end of the ibr-loop */
2143 if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
2144 sva[*n] = dznrm2_(m, &a[*n * a_dim1 + 1], &c__1);
2148 zlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
2149 sva[*n] = t * sqrt(aapp);
2152 /* Additional steering devices */
2154 if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
2158 if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && (
2159 doublereal) (*n) * mxaapq * mxsinj < tol) {
2163 if (notrot >= emptsw) {
2169 /* end i=1:NSWEEP loop */
2171 /* #:( Reaching this point means that the procedure has not converged. */
2176 /* #:) Reaching this point means numerical convergence after the i-th */
2180 /* #:) INFO = 0 confirms successful iterations. */
2183 /* Sort the singular values and find how many are above */
2184 /* the underflow threshold. */
2189 for (p = 1; p <= i__1; ++p) {
2191 q = idamax_(&i__2, &sva[p], &c__1) + p - 1;
2196 zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
2198 zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
2204 if (sva[p] * skl > sfmin) {
2210 if (sva[*n] != 0.) {
2212 if (sva[*n] * skl > sfmin) {
2217 /* Normalize the left singular vectors. */
2219 if (lsvec || uctol) {
2221 for (p = 1; p <= i__1; ++p) {
2222 /* CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 ) */
2223 zlascl_("G", &c__0, &c__0, &sva[p], &c_b42, m, &c__1, &a[p *
2224 a_dim1 + 1], m, &ierr);
2229 /* Scale the product of Jacobi rotations. */
2233 for (p = 1; p <= i__1; ++p) {
2234 temp1 = 1. / dznrm2_(&mvl, &v[p * v_dim1 + 1], &c__1);
2235 zdscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1);
2240 /* Undo scaling, if necessary (and possible). */
2241 if (skl > 1. && sva[1] < big / skl || skl < 1. && sva[f2cmax(n2,1)] > sfmin /
2244 for (p = 1; p <= i__1; ++p) {
2245 sva[p] = skl * sva[p];
2252 /* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */
2253 /* then some of the singular values may overflow or underflow and */
2254 /* the spectrum is given in this factored representation. */
2256 rwork[2] = (doublereal) n4;
2257 /* N4 is the number of computed nonzero singular values of A. */
2259 rwork[3] = (doublereal) n2;
2260 /* N2 is the number of singular values of A greater than SFMIN. */
2261 /* If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers */
2262 /* that may carry some information. */
2264 rwork[4] = (doublereal) i__;
2265 /* i is the index of the last sweep before declaring convergence. */
2268 /* MXAAPQ is the largest absolute value of scaled pivots in the */
2272 /* MXSINJ is the largest absolute value of the sines of Jacobi angles */
2273 /* in the last sweep */