1 *> \brief \b SGSVJ1 pre-processor for the routine sgesvj, applies Jacobi rotations targeting only particular pivots.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download SGSVJ1 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj1.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj1.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj1.f">
21 * SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV,
22 * EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
24 * .. Scalar Arguments ..
25 * REAL EPS, SFMIN, TOL
26 * INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP
29 * .. Array Arguments ..
30 * REAL A( LDA, * ), D( N ), SVA( N ), V( LDV, * ),
40 *> SGSVJ1 is called from SGESVJ as a pre-processor and that is its main
41 *> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
42 *> it targets only particular pivots and it does not check convergence
43 *> (stopping criterion). Few tunning parameters (marked by [TP]) are
44 *> available for the implementer.
48 *> SGSVJ1 applies few sweeps of Jacobi rotations in the column space of
49 *> the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
50 *> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
51 *> block-entries (tiles) of the (1,2) off-diagonal block are marked by the
52 *> [x]'s in the following scheme:
54 *> | * * * [x] [x] [x]|
55 *> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks.
56 *> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block.
57 *> |[x] [x] [x] * * * |
58 *> |[x] [x] [x] * * * |
59 *> |[x] [x] [x] * * * |
61 *> In terms of the columns of A, the first N1 columns are rotated 'against'
62 *> the remaining N-N1 columns, trying to increase the angle between the
63 *> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
64 *> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter.
65 *> The number of sweeps is given in NSWEEP and the orthogonality threshold
74 *> JOBV is CHARACTER*1
75 *> Specifies whether the output from this procedure is used
76 *> to compute the matrix V:
77 *> = 'V': the product of the Jacobi rotations is accumulated
78 *> by postmulyiplying the N-by-N array V.
79 *> (See the description of V.)
80 *> = 'A': the product of the Jacobi rotations is accumulated
81 *> by postmulyiplying the MV-by-N array V.
82 *> (See the descriptions of MV and V.)
83 *> = 'N': the Jacobi rotations are not accumulated.
89 *> The number of rows of the input matrix A. M >= 0.
95 *> The number of columns of the input matrix A.
102 *> N1 specifies the 2 x 2 block partition, the first N1 columns are
103 *> rotated 'against' the remaining N-N1 columns of A.
108 *> A is REAL array, dimension (LDA,N)
109 *> On entry, M-by-N matrix A, such that A*diag(D) represents
112 *> A_onexit * D_onexit represents the input matrix A*diag(D)
113 *> post-multiplied by a sequence of Jacobi rotations, where the
114 *> rotation threshold and the total number of sweeps are given in
115 *> TOL and NSWEEP, respectively.
116 *> (See the descriptions of N1, D, TOL and NSWEEP.)
122 *> The leading dimension of the array A. LDA >= max(1,M).
127 *> D is REAL array, dimension (N)
128 *> The array D accumulates the scaling factors from the fast scaled
130 *> On entry, A*diag(D) represents the input matrix.
131 *> On exit, A_onexit*diag(D_onexit) represents the input matrix
132 *> post-multiplied by a sequence of Jacobi rotations, where the
133 *> rotation threshold and the total number of sweeps are given in
134 *> TOL and NSWEEP, respectively.
135 *> (See the descriptions of N1, A, TOL and NSWEEP.)
138 *> \param[in,out] SVA
140 *> SVA is REAL array, dimension (N)
141 *> On entry, SVA contains the Euclidean norms of the columns of
142 *> the matrix A*diag(D).
143 *> On exit, SVA contains the Euclidean norms of the columns of
144 *> the matrix onexit*diag(D_onexit).
150 *> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a
151 *> sequence of Jacobi rotations.
152 *> If JOBV = 'N', then MV is not referenced.
157 *> V is REAL array, dimension (LDV,N)
158 *> If JOBV .EQ. 'V' then N rows of V are post-multipled by a
159 *> sequence of Jacobi rotations.
160 *> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a
161 *> sequence of Jacobi rotations.
162 *> If JOBV = 'N', then V is not referenced.
168 *> The leading dimension of the array V, LDV >= 1.
169 *> If JOBV = 'V', LDV .GE. N.
170 *> If JOBV = 'A', LDV .GE. MV.
176 *> EPS = SLAMCH('Epsilon')
182 *> SFMIN = SLAMCH('Safe Minimum')
188 *> TOL is the threshold for Jacobi rotations. For a pair
189 *> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
190 *> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL.
196 *> NSWEEP is the number of sweeps of Jacobi rotations to be
202 *> WORK is REAL array, dimension LWORK.
208 *> LWORK is the dimension of WORK. LWORK .GE. M.
214 *> = 0 : successful exit.
215 *> < 0 : if INFO = -i, then the i-th argument had an illegal value
221 *> \author Univ. of Tennessee
222 *> \author Univ. of California Berkeley
223 *> \author Univ. of Colorado Denver
226 *> \date November 2015
228 *> \ingroup realOTHERcomputational
230 *> \par Contributors:
233 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
235 * =====================================================================
236 SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV,
237 $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
239 * -- LAPACK computational routine (version 3.6.0) --
240 * -- LAPACK is a software package provided by Univ. of Tennessee, --
241 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
244 * .. Scalar Arguments ..
246 INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP
249 * .. Array Arguments ..
250 REAL A( LDA, * ), D( N ), SVA( N ), V( LDV, * ),
254 * =====================================================================
256 * .. Local Parameters ..
258 PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0)
260 * .. Local Scalars ..
261 REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG,
262 $ BIGTHETA, CS, LARGE, MXAAPQ, MXSINJ, ROOTBIG,
263 $ ROOTEPS, ROOTSFMIN, ROOTTOL, SMALL, SN, T,
264 $ TEMP1, THETA, THSIGN
265 INTEGER BLSKIP, EMPTSW, i, ibr, igl, IERR, IJBLSK,
266 $ ISWROT, jbc, jgl, KBL, MVL, NOTROT, nblc, nblr,
267 $ p, PSKIPPED, q, ROWSKIP, SWBAND
268 LOGICAL APPLV, ROTOK, RSVEC
273 * .. Intrinsic Functions ..
274 INTRINSIC ABS, MAX, FLOAT, MIN, SIGN, SQRT
276 * .. External Functions ..
280 EXTERNAL ISAMAX, LSAME, SDOT, SNRM2
282 * .. External Subroutines ..
283 EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP
285 * .. Executable Statements ..
287 * Test the input parameters.
289 APPLV = LSAME( JOBV, 'A' )
290 RSVEC = LSAME( JOBV, 'V' )
291 IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN
293 ELSE IF( M.LT.0 ) THEN
295 ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN
297 ELSE IF( N1.LT.0 ) THEN
299 ELSE IF( LDA.LT.M ) THEN
301 ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN
303 ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR.
304 $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN
306 ELSE IF( TOL.LE.EPS ) THEN
308 ELSE IF( NSWEEP.LT.0 ) THEN
310 ELSE IF( LWORK.LT.M ) THEN
318 CALL XERBLA( 'SGSVJ1', -INFO )
324 ELSE IF( APPLV ) THEN
327 RSVEC = RSVEC .OR. APPLV
329 ROOTEPS = SQRT( EPS )
330 ROOTSFMIN = SQRT( SFMIN )
333 ROOTBIG = ONE / ROOTSFMIN
334 LARGE = BIG / SQRT( FLOAT( M*N ) )
335 BIGTHETA = ONE / ROOTEPS
336 ROOTTOL = SQRT( TOL )
338 * .. Initialize the right singular vector matrix ..
340 * RSVEC = LSAME( JOBV, 'Y' )
346 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
350 IF( ( NBLR*KBL ).NE.N1 )NBLR = NBLR + 1
352 * .. the tiling is nblr-by-nblc [tiles]
354 NBLC = ( N-N1 ) / KBL
355 IF( ( NBLC*KBL ).NE.( N-N1 ) )NBLC = NBLC + 1
356 BLSKIP = ( KBL**2 ) + 1
357 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
359 ROWSKIP = MIN( 5, KBL )
360 *[TP] ROWSKIP is a tuning parameter.
362 *[TP] SWBAND is a tuning parameter. It is meaningful and effective
363 * if SGESVJ is used as a computational routine in the preconditioned
364 * Jacobi SVD algorithm SGESVJ.
367 * | * * * [x] [x] [x]|
368 * | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks.
369 * | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block.
370 * |[x] [x] [x] * * * |
371 * |[x] [x] [x] * * * |
372 * |[x] [x] [x] * * * |
375 DO 1993 i = 1, NSWEEP
385 DO 2000 ibr = 1, NBLR
387 igl = ( ibr-1 )*KBL + 1
390 *........................................................
391 * ... go to the off diagonal blocks
393 igl = ( ibr-1 )*KBL + 1
395 DO 2010 jbc = 1, NBLC
397 jgl = N1 + ( jbc-1 )*KBL + 1
399 * doing the block at ( ibr, jbc )
402 DO 2100 p = igl, MIN( igl+KBL-1, N1 )
406 IF( AAPP.GT.ZERO ) THEN
410 DO 2200 q = jgl, MIN( jgl+KBL-1, N )
414 IF( AAQQ.GT.ZERO ) THEN
417 * .. M x 2 Jacobi SVD ..
419 * .. Safe Gram matrix computation ..
421 IF( AAQQ.GE.ONE ) THEN
422 IF( AAPP.GE.AAQQ ) THEN
423 ROTOK = ( SMALL*AAPP ).LE.AAQQ
425 ROTOK = ( SMALL*AAQQ ).LE.AAPP
427 IF( AAPP.LT.( BIG / AAQQ ) ) THEN
428 AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1,
429 $ q ), 1 )*D( p )*D( q ) / AAQQ )
432 CALL SCOPY( M, A( 1, p ), 1, WORK, 1 )
433 CALL SLASCL( 'G', 0, 0, AAPP, D( p ),
434 $ M, 1, WORK, LDA, IERR )
435 AAPQ = SDOT( M, WORK, 1, A( 1, q ),
439 IF( AAPP.GE.AAQQ ) THEN
440 ROTOK = AAPP.LE.( AAQQ / SMALL )
442 ROTOK = AAQQ.LE.( AAPP / SMALL )
444 IF( AAPP.GT.( SMALL / AAQQ ) ) THEN
445 AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1,
446 $ q ), 1 )*D( p )*D( q ) / AAQQ )
449 CALL SCOPY( M, A( 1, q ), 1, WORK, 1 )
450 CALL SLASCL( 'G', 0, 0, AAQQ, D( q ),
451 $ M, 1, WORK, LDA, IERR )
452 AAPQ = SDOT( M, WORK, 1, A( 1, p ),
457 MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) )
459 * TO rotate or NOT to rotate, THAT is the question ...
461 IF( ABS( AAPQ ).GT.TOL ) THEN
463 * ROTATED = ROTATED + 1
471 THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ
472 IF( AAQQ.GT.AAPP0 )THETA = -THETA
474 IF( ABS( THETA ).GT.BIGTHETA ) THEN
476 FASTR( 3 ) = T*D( p ) / D( q )
477 FASTR( 4 ) = -T*D( q ) / D( p )
478 CALL SROTM( M, A( 1, p ), 1,
479 $ A( 1, q ), 1, FASTR )
480 IF( RSVEC )CALL SROTM( MVL,
484 SVA( q ) = AAQQ*SQRT( MAX( ZERO,
485 $ ONE+T*APOAQ*AAPQ ) )
486 AAPP = AAPP*SQRT( MAX( ZERO,
487 $ ONE-T*AQOAP*AAPQ ) )
488 MXSINJ = MAX( MXSINJ, ABS( T ) )
491 * .. choose correct signum for THETA and rotate
493 THSIGN = -SIGN( ONE, AAPQ )
494 IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN
495 T = ONE / ( THETA+THSIGN*
496 $ SQRT( ONE+THETA*THETA ) )
497 CS = SQRT( ONE / ( ONE+T*T ) )
499 MXSINJ = MAX( MXSINJ, ABS( SN ) )
500 SVA( q ) = AAQQ*SQRT( MAX( ZERO,
501 $ ONE+T*APOAQ*AAPQ ) )
502 AAPP = AAPP*SQRT( MAX( ZERO,
503 $ ONE-T*AQOAP*AAPQ ) )
505 APOAQ = D( p ) / D( q )
506 AQOAP = D( q ) / D( p )
507 IF( D( p ).GE.ONE ) THEN
509 IF( D( q ).GE.ONE ) THEN
511 FASTR( 4 ) = -T*AQOAP
514 CALL SROTM( M, A( 1, p ), 1,
517 IF( RSVEC )CALL SROTM( MVL,
518 $ V( 1, p ), 1, V( 1, q ),
521 CALL SAXPY( M, -T*AQOAP,
524 CALL SAXPY( M, CS*SN*APOAQ,
528 CALL SAXPY( MVL, -T*AQOAP,
540 IF( D( q ).GE.ONE ) THEN
541 CALL SAXPY( M, T*APOAQ,
544 CALL SAXPY( M, -CS*SN*AQOAP,
548 CALL SAXPY( MVL, T*APOAQ,
559 IF( D( p ).GE.D( q ) ) THEN
560 CALL SAXPY( M, -T*AQOAP,
563 CALL SAXPY( M, CS*SN*APOAQ,
579 CALL SAXPY( M, T*APOAQ,
590 $ T*APOAQ, V( 1, p ),
603 IF( AAPP.GT.AAQQ ) THEN
604 CALL SCOPY( M, A( 1, p ), 1, WORK,
606 CALL SLASCL( 'G', 0, 0, AAPP, ONE,
607 $ M, 1, WORK, LDA, IERR )
608 CALL SLASCL( 'G', 0, 0, AAQQ, ONE,
609 $ M, 1, A( 1, q ), LDA,
611 TEMP1 = -AAPQ*D( p ) / D( q )
612 CALL SAXPY( M, TEMP1, WORK, 1,
614 CALL SLASCL( 'G', 0, 0, ONE, AAQQ,
615 $ M, 1, A( 1, q ), LDA,
617 SVA( q ) = AAQQ*SQRT( MAX( ZERO,
619 MXSINJ = MAX( MXSINJ, SFMIN )
621 CALL SCOPY( M, A( 1, q ), 1, WORK,
623 CALL SLASCL( 'G', 0, 0, AAQQ, ONE,
624 $ M, 1, WORK, LDA, IERR )
625 CALL SLASCL( 'G', 0, 0, AAPP, ONE,
626 $ M, 1, A( 1, p ), LDA,
628 TEMP1 = -AAPQ*D( q ) / D( p )
629 CALL SAXPY( M, TEMP1, WORK, 1,
631 CALL SLASCL( 'G', 0, 0, ONE, AAPP,
632 $ M, 1, A( 1, p ), LDA,
634 SVA( p ) = AAPP*SQRT( MAX( ZERO,
636 MXSINJ = MAX( MXSINJ, SFMIN )
639 * END IF ROTOK THEN ... ELSE
641 * In the case of cancellation in updating SVA(q)
642 * .. recompute SVA(q)
643 IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS )
645 IF( ( AAQQ.LT.ROOTBIG ) .AND.
646 $ ( AAQQ.GT.ROOTSFMIN ) ) THEN
647 SVA( q ) = SNRM2( M, A( 1, q ), 1 )*
652 CALL SLASSQ( M, A( 1, q ), 1, T,
654 SVA( q ) = T*SQRT( AAQQ )*D( q )
657 IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN
658 IF( ( AAPP.LT.ROOTBIG ) .AND.
659 $ ( AAPP.GT.ROOTSFMIN ) ) THEN
660 AAPP = SNRM2( M, A( 1, p ), 1 )*
665 CALL SLASSQ( M, A( 1, p ), 1, T,
667 AAPP = T*SQRT( AAPP )*D( p )
674 * SKIPPED = SKIPPED + 1
675 PSKIPPED = PSKIPPED + 1
680 PSKIPPED = PSKIPPED + 1
684 * IF ( NOTROT .GE. EMPTSW ) GO TO 2011
685 IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) )
691 IF( ( i.LE.SWBAND ) .AND.
692 $ ( PSKIPPED.GT.ROWSKIP ) ) THEN
706 IF( AAPP.EQ.ZERO )NOTROT = NOTROT +
707 $ MIN( jgl+KBL-1, N ) - jgl + 1
708 IF( AAPP.LT.ZERO )NOTROT = 0
709 *** IF ( NOTROT .GE. EMPTSW ) GO TO 2011
715 * end of the jbc-loop
717 *2011 bailed out of the jbc-loop
718 DO 2012 p = igl, MIN( igl+KBL-1, N )
719 SVA( p ) = ABS( SVA( p ) )
721 *** IF ( NOTROT .GE. EMPTSW ) GO TO 1994
723 *2000 :: end of the ibr-loop
726 IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) )
728 SVA( N ) = SNRM2( M, A( 1, N ), 1 )*D( N )
732 CALL SLASSQ( M, A( 1, N ), 1, T, AAPP )
733 SVA( N ) = T*SQRT( AAPP )*D( N )
736 * Additional steering devices
738 IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR.
739 $ ( ISWROT.LE.N ) ) )SWBAND = i
741 IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.FLOAT( N )*TOL ) .AND.
742 $ ( FLOAT( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN
747 IF( NOTROT.GE.EMPTSW )GO TO 1994
750 * end i=1:NSWEEP loop
751 * #:) Reaching this point means that the procedure has completed the given
756 * #:) Reaching this point means that during the i-th sweep all pivots were
757 * below the given threshold, causing early exit.
760 * #:) INFO = 0 confirms successful iterations.
766 q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1
774 CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 )
775 IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 )