1 *> \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download DLASQ2 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq2.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq2.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq2.f">
21 * SUBROUTINE DLASQ2( N, Z, INFO )
23 * .. Scalar Arguments ..
26 * .. Array Arguments ..
27 * DOUBLE PRECISION Z( * )
36 *> DLASQ2 computes all the eigenvalues of the symmetric positive
37 *> definite tridiagonal matrix associated with the qd array Z to high
38 *> relative accuracy are computed to high relative accuracy, in the
39 *> absence of denormalization, underflow and overflow.
41 *> To see the relation of Z to the tridiagonal matrix, let L be a
42 *> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
43 *> let U be an upper bidiagonal matrix with 1's above and diagonal
44 *> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
45 *> symmetric tridiagonal to which it is similar.
47 *> Note : DLASQ2 defines a logical variable, IEEE, which is true
48 *> on machines which follow ieee-754 floating-point standard in their
49 *> handling of infinities and NaNs, and false otherwise. This variable
50 *> is passed to DLASQ3.
59 *> The number of rows and columns in the matrix. N >= 0.
64 *> Z is DOUBLE PRECISION array, dimension ( 4*N )
65 *> On entry Z holds the qd array. On exit, entries 1 to N hold
66 *> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
67 *> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
68 *> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
69 *> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
70 *> shifts that failed.
76 *> = 0: successful exit
77 *> < 0: if the i-th argument is a scalar and had an illegal
78 *> value, then INFO = -i, if the i-th argument is an
79 *> array and the j-entry had an illegal value, then
81 *> > 0: the algorithm failed
82 *> = 1, a split was marked by a positive value in E
83 *> = 2, current block of Z not diagonalized after 100*N
84 *> iterations (in inner while loop). On exit Z holds
85 *> a qd array with the same eigenvalues as the given Z.
86 *> = 3, termination criterion of outer while loop not met
87 *> (program created more than N unreduced blocks)
93 *> \author Univ. of Tennessee
94 *> \author Univ. of California Berkeley
95 *> \author Univ. of Colorado Denver
98 *> \date September 2012
100 *> \ingroup auxOTHERcomputational
102 *> \par Further Details:
103 * =====================
107 *> Local Variables: I0:N0 defines a current unreduced segment of Z.
108 *> The shifts are accumulated in SIGMA. Iteration count is in ITER.
109 *> Ping-pong is controlled by PP (alternates between 0 and 1).
112 * =====================================================================
113 SUBROUTINE DLASQ2( N, Z, INFO )
115 * -- LAPACK computational routine (version 3.4.2) --
116 * -- LAPACK is a software package provided by Univ. of Tennessee, --
117 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
120 * .. Scalar Arguments ..
123 * .. Array Arguments ..
124 DOUBLE PRECISION Z( * )
127 * =====================================================================
130 DOUBLE PRECISION CBIAS
131 PARAMETER ( CBIAS = 1.50D0 )
132 DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD
133 PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0,
134 $ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 )
136 * .. Local Scalars ..
138 INTEGER I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB,
139 $ K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT,
141 DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN,
142 $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX,
143 $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL,
144 $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ
146 * .. External Subroutines ..
147 EXTERNAL DLASQ3, DLASRT, XERBLA
149 * .. External Functions ..
151 DOUBLE PRECISION DLAMCH
152 EXTERNAL DLAMCH, ILAENV
154 * .. Intrinsic Functions ..
155 INTRINSIC ABS, DBLE, MAX, MIN, SQRT
157 * .. Executable Statements ..
159 * Test the input arguments.
160 * (in case DLASQ2 is not called by DLASQ1)
163 EPS = DLAMCH( 'Precision' )
164 SAFMIN = DLAMCH( 'Safe minimum' )
170 CALL XERBLA( 'DLASQ2', 1 )
172 ELSE IF( N.EQ.0 ) THEN
174 ELSE IF( N.EQ.1 ) THEN
178 IF( Z( 1 ).LT.ZERO ) THEN
180 CALL XERBLA( 'DLASQ2', 2 )
183 ELSE IF( N.EQ.2 ) THEN
187 IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN
189 CALL XERBLA( 'DLASQ2', 2 )
191 ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN
196 Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
197 IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
198 T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) )
199 S = Z( 3 )*( Z( 2 ) / T )
201 S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
203 S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
205 T = Z( 1 ) + ( S+Z( 2 ) )
206 Z( 3 ) = Z( 3 )*( Z( 1 ) / T )
210 Z( 6 ) = Z( 2 ) + Z( 1 )
214 * Check for negative data and compute sums of q's and e's.
223 DO 10 K = 1, 2*( N-1 ), 2
224 IF( Z( K ).LT.ZERO ) THEN
226 CALL XERBLA( 'DLASQ2', 2 )
228 ELSE IF( Z( K+1 ).LT.ZERO ) THEN
230 CALL XERBLA( 'DLASQ2', 2 )
235 QMAX = MAX( QMAX, Z( K ) )
236 EMIN = MIN( EMIN, Z( K+1 ) )
237 ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) )
239 IF( Z( 2*N-1 ).LT.ZERO ) THEN
240 INFO = -( 200+2*N-1 )
241 CALL XERBLA( 'DLASQ2', 2 )
245 QMAX = MAX( QMAX, Z( 2*N-1 ) )
246 ZMAX = MAX( QMAX, ZMAX )
248 * Check for diagonality.
254 CALL DLASRT( 'D', N, Z, IINFO )
261 * Check for zero data.
263 IF( TRACE.EQ.ZERO ) THEN
268 * Check whether the machine is IEEE conformable.
270 IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
271 $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1
273 * Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
279 Z( 2*K-3 ) = Z( K-1 )
285 * Reverse the qd-array, if warranted.
287 IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN
289 DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4
291 Z( I4-3 ) = Z( IPN4-I4-3 )
292 Z( IPN4-I4-3 ) = TEMP
294 Z( I4-1 ) = Z( IPN4-I4-5 )
295 Z( IPN4-I4-5 ) = TEMP
299 * Initial split checking via dqd and Li's test.
306 DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4
307 IF( Z( I4-1 ).LE.TOL2*D ) THEN
311 D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) )
315 * dqd maps Z to ZZ plus Li's test.
317 EMIN = Z( 4*I0+PP+1 )
319 DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4
320 Z( I4-2*PP-2 ) = D + Z( I4-1 )
321 IF( Z( I4-1 ).LE.TOL2*D ) THEN
326 ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND.
327 $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN
328 TEMP = Z( I4+1 ) / Z( I4-2*PP-2 )
329 Z( I4-2*PP ) = Z( I4-1 )*TEMP
332 Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) )
333 D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
335 EMIN = MIN( EMIN, Z( I4-2*PP ) )
341 QMAX = Z( 4*I0-PP-2 )
342 DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4
343 QMAX = MAX( QMAX, Z( I4 ) )
346 * Prepare for the next iteration on K.
351 * Initialise variables to pass to DLASQ3.
366 DO 160 IWHILA = 1, N + 1
370 * While array unfinished do
372 * E(N0) holds the value of SIGMA when submatrix in I0:N0
373 * splits from the rest of the array, but is negated.
381 IF( SIGMA.LT.ZERO ) THEN
386 * Find last unreduced submatrix's top index I0, find QMAX and
387 * EMIN. Find Gershgorin-type bound if Q's much greater than E's.
391 EMIN = ABS( Z( 4*N0-5 ) )
397 DO 90 I4 = 4*N0, 8, -4
398 IF( Z( I4-5 ).LE.ZERO )
400 IF( QMIN.GE.FOUR*EMAX ) THEN
401 QMIN = MIN( QMIN, Z( I4-3 ) )
402 EMAX = MAX( EMAX, Z( I4-5 ) )
404 QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
405 EMIN = MIN( EMIN, Z( I4-5 ) )
413 IF( N0-I0.GT.1 ) THEN
417 DO 110 I4 = 4*I0+1, 4*N0-3, 4
418 DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) )
419 IF( DEE.LE.DEEMIN ) THEN
424 IF( (KMIN-I0)*2.LT.N0-KMIN .AND.
425 $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN
428 DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4
430 Z( I4-3 ) = Z( IPN4-I4-3 )
431 Z( IPN4-I4-3 ) = TEMP
433 Z( I4-2 ) = Z( IPN4-I4-2 )
434 Z( IPN4-I4-2 ) = TEMP
436 Z( I4-1 ) = Z( IPN4-I4-5 )
437 Z( IPN4-I4-5 ) = TEMP
439 Z( I4 ) = Z( IPN4-I4-4 )
440 Z( IPN4-I4-4 ) = TEMP
445 * Put -(initial shift) into DMIN.
447 DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
449 * Now I0:N0 is unreduced.
450 * PP = 0 for ping, PP = 1 for pong.
451 * PP = 2 indicates that flipping was applied to the Z array and
452 * and that the tests for deflation upon entry in DLASQ3
453 * should not be performed.
455 NBIG = 100*( N0-I0+1 )
456 DO 140 IWHILB = 1, NBIG
460 * While submatrix unfinished take a good dqds step.
462 CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
463 $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
468 * When EMIN is very small check for splits.
470 IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN
471 IF( Z( 4*N0 ).LE.TOL2*QMAX .OR.
472 $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN
477 DO 130 I4 = 4*I0, 4*( N0-3 ), 4
478 IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR.
479 $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN
486 QMAX = MAX( QMAX, Z( I4+1 ) )
487 EMIN = MIN( EMIN, Z( I4-1 ) )
488 OLDEMN = MIN( OLDEMN, Z( I4 ) )
501 * Maximum number of iterations exceeded, restore the shift
502 * SIGMA and place the new d's and e's in a qd array.
503 * This might need to be done for several blocks
509 Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA
512 Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 ))
514 Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 )
517 * Prepare to do this on the previous block if there is one
521 DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) )
529 Z( 2*K-1 ) = Z( 4*K-3 )
531 * Only the block 1..N0 is unfinished. The rest of the e's
532 * must be essentially zero, although sometimes other data
533 * has been stored in them.
536 Z( 2*K ) = Z( 4*K-1 )
556 * Move q's to the front.
562 * Sort and compute sum of eigenvalues.
564 CALL DLASRT( 'D', N, Z, IINFO )
571 * Store trace, sum(eigenvalues) and information on performance.
575 Z( 2*N+3 ) = DBLE( ITER )
576 Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )
577 Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER )