1 SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2 $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
3 $ NV, WV, LDWV, WORK, LWORK )
5 * -- LAPACK auxiliary routine (version 3.2) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
14 * .. Array Arguments ..
15 COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
16 $ WORK( * ), WV( LDWV, * ), Z( LDZ, * )
19 * This subroutine is identical to CLAQR3 except that it avoids
20 * recursion by calling CLAHQR instead of CLAQR4.
23 * ******************************************************************
24 * Aggressive early deflation:
26 * This subroutine accepts as input an upper Hessenberg matrix
27 * H and performs an unitary similarity transformation
28 * designed to detect and deflate fully converged eigenvalues from
29 * a trailing principal submatrix. On output H has been over-
30 * written by a new Hessenberg matrix that is a perturbation of
31 * an unitary similarity transformation of H. It is to be
32 * hoped that the final version of H has many zero subdiagonal
35 * ******************************************************************
36 * WANTT (input) LOGICAL
37 * If .TRUE., then the Hessenberg matrix H is fully updated
38 * so that the triangular Schur factor may be
39 * computed (in cooperation with the calling subroutine).
40 * If .FALSE., then only enough of H is updated to preserve
43 * WANTZ (input) LOGICAL
44 * If .TRUE., then the unitary matrix Z is updated so
45 * so that the unitary Schur factor may be computed
46 * (in cooperation with the calling subroutine).
47 * If .FALSE., then Z is not referenced.
50 * The order of the matrix H and (if WANTZ is .TRUE.) the
51 * order of the unitary matrix Z.
53 * KTOP (input) INTEGER
54 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
55 * KBOT and KTOP together determine an isolated block
56 * along the diagonal of the Hessenberg matrix.
58 * KBOT (input) INTEGER
59 * It is assumed without a check that either
60 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
61 * determine an isolated block along the diagonal of the
65 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
67 * H (input/output) COMPLEX array, dimension (LDH,N)
68 * On input the initial N-by-N section of H stores the
69 * Hessenberg matrix undergoing aggressive early deflation.
70 * On output H has been transformed by a unitary
71 * similarity transformation, perturbed, and the returned
72 * to Hessenberg form that (it is to be hoped) has some
73 * zero subdiagonal entries.
76 * Leading dimension of H just as declared in the calling
77 * subroutine. N .LE. LDH
79 * ILOZ (input) INTEGER
80 * IHIZ (input) INTEGER
81 * Specify the rows of Z to which transformations must be
82 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
84 * Z (input/output) COMPLEX array, dimension (LDZ,N)
85 * IF WANTZ is .TRUE., then on output, the unitary
86 * similarity transformation mentioned above has been
87 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
88 * If WANTZ is .FALSE., then Z is unreferenced.
91 * The leading dimension of Z just as declared in the
92 * calling subroutine. 1 .LE. LDZ.
95 * The number of unconverged (ie approximate) eigenvalues
96 * returned in SR and SI that may be used as shifts by the
100 * The number of converged eigenvalues uncovered by this
103 * SH (output) COMPLEX array, dimension KBOT
104 * On output, approximate eigenvalues that may
105 * be used for shifts are stored in SH(KBOT-ND-NS+1)
106 * through SR(KBOT-ND). Converged eigenvalues are
107 * stored in SH(KBOT-ND+1) through SH(KBOT).
109 * V (workspace) COMPLEX array, dimension (LDV,NW)
110 * An NW-by-NW work array.
112 * LDV (input) integer scalar
113 * The leading dimension of V just as declared in the
114 * calling subroutine. NW .LE. LDV
116 * NH (input) integer scalar
117 * The number of columns of T. NH.GE.NW.
119 * T (workspace) COMPLEX array, dimension (LDT,NW)
121 * LDT (input) integer
122 * The leading dimension of T just as declared in the
123 * calling subroutine. NW .LE. LDT
126 * The number of rows of work array WV available for
127 * workspace. NV.GE.NW.
129 * WV (workspace) COMPLEX array, dimension (LDWV,NW)
131 * LDWV (input) integer
132 * The leading dimension of W just as declared in the
133 * calling subroutine. NW .LE. LDV
135 * WORK (workspace) COMPLEX array, dimension LWORK.
136 * On exit, WORK(1) is set to an estimate of the optimal value
137 * of LWORK for the given values of N, NW, KTOP and KBOT.
139 * LWORK (input) integer
140 * The dimension of the work array WORK. LWORK = 2*NW
141 * suffices, but greater efficiency may result from larger
144 * If LWORK = -1, then a workspace query is assumed; CLAQR2
145 * only estimates the optimal workspace size for the given
146 * values of N, NW, KTOP and KBOT. The estimate is returned
147 * in WORK(1). No error message related to LWORK is issued
148 * by XERBLA. Neither H nor Z are accessed.
150 * ================================================================
151 * Based on contributions by
152 * Karen Braman and Ralph Byers, Department of Mathematics,
153 * University of Kansas, USA
155 * ================================================================
158 PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ),
159 $ ONE = ( 1.0e0, 0.0e0 ) )
161 PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 )
163 * .. Local Scalars ..
164 COMPLEX BETA, CDUM, S, TAU
165 REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP
166 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
167 $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
169 * .. External Functions ..
173 * .. External Subroutines ..
174 EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF,
175 $ CLARFG, CLASET, CTREXC, CUNMHR, SLABAD
177 * .. Intrinsic Functions ..
178 INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL
180 * .. Statement Functions ..
183 * .. Statement Function definitions ..
184 CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
186 * .. Executable Statements ..
188 * ==== Estimate optimal workspace. ====
190 JW = MIN( NW, KBOT-KTOP+1 )
195 * ==== Workspace query call to CGEHRD ====
197 CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
198 LWK1 = INT( WORK( 1 ) )
200 * ==== Workspace query call to CUNMHR ====
202 CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
204 LWK2 = INT( WORK( 1 ) )
206 * ==== Optimal workspace ====
208 LWKOPT = JW + MAX( LWK1, LWK2 )
211 * ==== Quick return in case of workspace query. ====
213 IF( LWORK.EQ.-1 ) THEN
214 WORK( 1 ) = CMPLX( LWKOPT, 0 )
218 * ==== Nothing to do ...
219 * ... for an empty active block ... ====
225 * ... nor for an empty deflation window. ====
229 * ==== Machine constants ====
231 SAFMIN = SLAMCH( 'SAFE MINIMUM' )
232 SAFMAX = RONE / SAFMIN
233 CALL SLABAD( SAFMIN, SAFMAX )
234 ULP = SLAMCH( 'PRECISION' )
235 SMLNUM = SAFMIN*( REAL( N ) / ULP )
237 * ==== Setup deflation window ====
239 JW = MIN( NW, KBOT-KTOP+1 )
240 KWTOP = KBOT - JW + 1
241 IF( KWTOP.EQ.KTOP ) THEN
244 S = H( KWTOP, KWTOP-1 )
247 IF( KBOT.EQ.KWTOP ) THEN
249 * ==== 1-by-1 deflation window: not much to do ====
251 SH( KWTOP ) = H( KWTOP, KWTOP )
254 IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
259 $ H( KWTOP, KWTOP-1 ) = ZERO
265 * ==== Convert to spike-triangular form. (In case of a
266 * . rare QR failure, this routine continues to do
267 * . aggressive early deflation using that part of
268 * . the deflation window that converged using INFQR
269 * . here and there to keep track.) ====
271 CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
272 CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
274 CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
275 CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
276 $ JW, V, LDV, INFQR )
278 * ==== Deflation detection loop ====
282 DO 10 KNT = INFQR + 1, JW
284 * ==== Small spike tip deflation test ====
286 FOO = CABS1( T( NS, NS ) )
289 IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
292 * ==== One more converged eigenvalue ====
297 * ==== One undeflatable eigenvalue. Move it up out of the
298 * . way. (CTREXC can not fail in this case.) ====
301 CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
306 * ==== Return to Hessenberg form ====
313 * ==== sorting the diagonal of T improves accuracy for
314 * . graded matrices. ====
316 DO 30 I = INFQR + 1, NS
319 IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
324 $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
328 * ==== Restore shift/eigenvalue array from T ====
330 DO 40 I = INFQR + 1, JW
331 SH( KWTOP+I-1 ) = T( I, I )
335 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
336 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
338 * ==== Reflect spike back into lower triangle ====
340 CALL CCOPY( NS, V, LDV, WORK, 1 )
342 WORK( I ) = CONJG( WORK( I ) )
345 CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU )
348 CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
350 CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT,
352 CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
354 CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
357 CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
361 * ==== Copy updated reduced window into place ====
364 $ H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) )
365 CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
366 CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
369 * ==== Accumulate orthogonal matrix in order update
370 * . H and Z, if requested. ====
372 IF( NS.GT.1 .AND. S.NE.ZERO )
373 $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
374 $ WORK( JW+1 ), LWORK-JW, INFO )
376 * ==== Update vertical slab in H ====
383 DO 60 KROW = LTOP, KWTOP - 1, NV
384 KLN = MIN( NV, KWTOP-KROW )
385 CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
386 $ LDH, V, LDV, ZERO, WV, LDWV )
387 CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
390 * ==== Update horizontal slab in H ====
393 DO 70 KCOL = KBOT + 1, N, NH
394 KLN = MIN( NH, N-KCOL+1 )
395 CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
396 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
397 CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
402 * ==== Update vertical slab in Z ====
405 DO 80 KROW = ILOZ, IHIZ, NV
406 KLN = MIN( NV, IHIZ-KROW+1 )
407 CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
408 $ LDZ, V, LDV, ZERO, WV, LDWV )
409 CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
415 * ==== Return the number of deflations ... ====
419 * ==== ... and the number of shifts. (Subtracting
420 * . INFQR from the spike length takes care
421 * . of the case of a rare QR failure while
422 * . calculating eigenvalues of the deflation
427 * ==== Return optimal workspace. ====
429 WORK( 1 ) = CMPLX( LWKOPT, 0 )
431 * ==== End of CLAQR2 ====