SRC/claqr2.f

   1       SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
   2      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
   3      $                   NV, WV, LDWV, WORK, LWORK )
   4 *
   5 *  -- LAPACK auxiliary routine (version 3.2) --
   6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
   7 *     November 2006
   8 *
   9 *     .. Scalar Arguments ..
  10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
  11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
  12       LOGICAL            WANTT, WANTZ
  13 *     ..
  14 *     .. Array Arguments ..
  15       COMPLEX            H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
  16      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
  17 *     ..
  18 *
  19 *     This subroutine is identical to CLAQR3 except that it avoids
  20 *     recursion by calling CLAHQR instead of CLAQR4.
  21 *
  22 *
  23 *     ******************************************************************
  24 *     Aggressive early deflation:
  25 *
  26 *     This subroutine accepts as input an upper Hessenberg matrix
  27 *     H and performs an unitary similarity transformation
  28 *     designed to detect and deflate fully converged eigenvalues from
  29 *     a trailing principal submatrix.  On output H has been over-
  30 *     written by a new Hessenberg matrix that is a perturbation of
  31 *     an unitary similarity transformation of H.  It is to be
  32 *     hoped that the final version of H has many zero subdiagonal
  33 *     entries.
  34 *
  35 *     ******************************************************************
  36 *     WANTT   (input) LOGICAL
  37 *          If .TRUE., then the Hessenberg matrix H is fully updated
  38 *          so that the triangular Schur factor may be
  39 *          computed (in cooperation with the calling subroutine).
  40 *          If .FALSE., then only enough of H is updated to preserve
  41 *          the eigenvalues.
  42 *
  43 *     WANTZ   (input) LOGICAL
  44 *          If .TRUE., then the unitary matrix Z is updated so
  45 *          so that the unitary Schur factor may be computed
  46 *          (in cooperation with the calling subroutine).
  47 *          If .FALSE., then Z is not referenced.
  48 *
  49 *     N       (input) INTEGER
  50 *          The order of the matrix H and (if WANTZ is .TRUE.) the
  51 *          order of the unitary matrix Z.
  52 *
  53 *     KTOP    (input) INTEGER
  54 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
  55 *          KBOT and KTOP together determine an isolated block
  56 *          along the diagonal of the Hessenberg matrix.
  57 *
  58 *     KBOT    (input) INTEGER
  59 *          It is assumed without a check that either
  60 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
  61 *          determine an isolated block along the diagonal of the
  62 *          Hessenberg matrix.
  63 *
  64 *     NW      (input) INTEGER
  65 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
  66 *
  67 *     H       (input/output) COMPLEX array, dimension (LDH,N)
  68 *          On input the initial N-by-N section of H stores the
  69 *          Hessenberg matrix undergoing aggressive early deflation.
  70 *          On output H has been transformed by a unitary
  71 *          similarity transformation, perturbed, and the returned
  72 *          to Hessenberg form that (it is to be hoped) has some
  73 *          zero subdiagonal entries.
  74 *
  75 *     LDH     (input) integer
  76 *          Leading dimension of H just as declared in the calling
  77 *          subroutine.  N .LE. LDH
  78 *
  79 *     ILOZ    (input) INTEGER
  80 *     IHIZ    (input) INTEGER
  81 *          Specify the rows of Z to which transformations must be
  82 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
  83 *
  84 *     Z       (input/output) COMPLEX array, dimension (LDZ,N)
  85 *          IF WANTZ is .TRUE., then on output, the unitary
  86 *          similarity transformation mentioned above has been
  87 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
  88 *          If WANTZ is .FALSE., then Z is unreferenced.
  89 *
  90 *     LDZ     (input) integer
  91 *          The leading dimension of Z just as declared in the
  92 *          calling subroutine.  1 .LE. LDZ.
  93 *
  94 *     NS      (output) integer
  95 *          The number of unconverged (ie approximate) eigenvalues
  96 *          returned in SR and SI that may be used as shifts by the
  97 *          calling subroutine.
  98 *
  99 *     ND      (output) integer
 100 *          The number of converged eigenvalues uncovered by this
 101 *          subroutine.
 102 *
 103 *     SH      (output) COMPLEX array, dimension KBOT
 104 *          On output, approximate eigenvalues that may
 105 *          be used for shifts are stored in SH(KBOT-ND-NS+1)
 106 *          through SR(KBOT-ND).  Converged eigenvalues are
 107 *          stored in SH(KBOT-ND+1) through SH(KBOT).
 108 *
 109 *     V       (workspace) COMPLEX array, dimension (LDV,NW)
 110 *          An NW-by-NW work array.
 111 *
 112 *     LDV     (input) integer scalar
 113 *          The leading dimension of V just as declared in the
 114 *          calling subroutine.  NW .LE. LDV
 115 *
 116 *     NH      (input) integer scalar
 117 *          The number of columns of T.  NH.GE.NW.
 118 *
 119 *     T       (workspace) COMPLEX array, dimension (LDT,NW)
 120 *
 121 *     LDT     (input) integer
 122 *          The leading dimension of T just as declared in the
 123 *          calling subroutine.  NW .LE. LDT
 124 *
 125 *     NV      (input) integer
 126 *          The number of rows of work array WV available for
 127 *          workspace.  NV.GE.NW.
 128 *
 129 *     WV      (workspace) COMPLEX array, dimension (LDWV,NW)
 130 *
 131 *     LDWV    (input) integer
 132 *          The leading dimension of W just as declared in the
 133 *          calling subroutine.  NW .LE. LDV
 134 *
 135 *     WORK    (workspace) COMPLEX array, dimension LWORK.
 136 *          On exit, WORK(1) is set to an estimate of the optimal value
 137 *          of LWORK for the given values of N, NW, KTOP and KBOT.
 138 *
 139 *     LWORK   (input) integer
 140 *          The dimension of the work array WORK.  LWORK = 2*NW
 141 *          suffices, but greater efficiency may result from larger
 142 *          values of LWORK.
 143 *
 144 *          If LWORK = -1, then a workspace query is assumed; CLAQR2
 145 *          only estimates the optimal workspace size for the given
 146 *          values of N, NW, KTOP and KBOT.  The estimate is returned
 147 *          in WORK(1).  No error message related to LWORK is issued
 148 *          by XERBLA.  Neither H nor Z are accessed.
 149 *
 150 *     ================================================================
 151 *     Based on contributions by
 152 *        Karen Braman and Ralph Byers, Department of Mathematics,
 153 *        University of Kansas, USA
 154 *
 155 *     ================================================================
 156 *     .. Parameters ..
 157       COMPLEX            ZERO, ONE
 158       PARAMETER          ( ZERO = ( 0.0e0, 0.0e0 ),
 159      $                   ONE = ( 1.0e0, 0.0e0 ) )
 160       REAL               RZERO, RONE
 161       PARAMETER          ( RZERO = 0.0e0, RONE = 1.0e0 )
 162 *     ..
 163 *     .. Local Scalars ..
 164       COMPLEX            BETA, CDUM, S, TAU
 165       REAL               FOO, SAFMAX, SAFMIN, SMLNUM, ULP
 166       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
 167      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
 168 *     ..
 169 *     .. External Functions ..
 170       REAL               SLAMCH
 171       EXTERNAL           SLAMCH
 172 *     ..
 173 *     .. External Subroutines ..
 174       EXTERNAL           CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF,
 175      $                   CLARFG, CLASET, CTREXC, CUNMHR, SLABAD
 176 *     ..
 177 *     .. Intrinsic Functions ..
 178       INTRINSIC          ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL
 179 *     ..
 180 *     .. Statement Functions ..
 181       REAL               CABS1
 182 *     ..
 183 *     .. Statement Function definitions ..
 184       CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
 185 *     ..
 186 *     .. Executable Statements ..
 187 *
 188 *     ==== Estimate optimal workspace. ====
 189 *
 190       JW = MIN( NW, KBOT-KTOP+1 )
 191       IF( JW.LE.2 ) THEN
 192          LWKOPT = 1
 193       ELSE
 194 *
 195 *        ==== Workspace query call to CGEHRD ====
 196 *
 197          CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
 198          LWK1 = INT( WORK( 1 ) )
 199 *
 200 *        ==== Workspace query call to CUNMHR ====
 201 *
 202          CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
 203      $                WORK, -1, INFO )
 204          LWK2 = INT( WORK( 1 ) )
 205 *
 206 *        ==== Optimal workspace ====
 207 *
 208          LWKOPT = JW + MAX( LWK1, LWK2 )
 209       END IF
 210 *
 211 *     ==== Quick return in case of workspace query. ====
 212 *
 213       IF( LWORK.EQ.-1 ) THEN
 214          WORK( 1 ) = CMPLX( LWKOPT, 0 )
 215          RETURN
 216       END IF
 217 *
 218 *     ==== Nothing to do ...
 219 *     ... for an empty active block ... ====
 220       NS = 0
 221       ND = 0
 222       WORK( 1 ) = ONE
 223       IF( KTOP.GT.KBOT )
 224      $   RETURN
 225 *     ... nor for an empty deflation window. ====
 226       IF( NW.LT.1 )
 227      $   RETURN
 228 *
 229 *     ==== Machine constants ====
 230 *
 231       SAFMIN = SLAMCH( 'SAFE MINIMUM' )
 232       SAFMAX = RONE / SAFMIN
 233       CALL SLABAD( SAFMIN, SAFMAX )
 234       ULP = SLAMCH( 'PRECISION' )
 235       SMLNUM = SAFMIN*( REAL( N ) / ULP )
 236 *
 237 *     ==== Setup deflation window ====
 238 *
 239       JW = MIN( NW, KBOT-KTOP+1 )
 240       KWTOP = KBOT - JW + 1
 241       IF( KWTOP.EQ.KTOP ) THEN
 242          S = ZERO
 243       ELSE
 244          S = H( KWTOP, KWTOP-1 )
 245       END IF
 246 *
 247       IF( KBOT.EQ.KWTOP ) THEN
 248 *
 249 *        ==== 1-by-1 deflation window: not much to do ====
 250 *
 251          SH( KWTOP ) = H( KWTOP, KWTOP )
 252          NS = 1
 253          ND = 0
 254          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
 255      $       KWTOP ) ) ) ) THEN
 256             NS = 0
 257             ND = 1
 258             IF( KWTOP.GT.KTOP )
 259      $         H( KWTOP, KWTOP-1 ) = ZERO
 260          END IF
 261          WORK( 1 ) = ONE
 262          RETURN
 263       END IF
 264 *
 265 *     ==== Convert to spike-triangular form.  (In case of a
 266 *     .    rare QR failure, this routine continues to do
 267 *     .    aggressive early deflation using that part of
 268 *     .    the deflation window that converged using INFQR
 269 *     .    here and there to keep track.) ====
 270 *
 271       CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
 272       CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
 273 *
 274       CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
 275       CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
 276      $             JW, V, LDV, INFQR )
 277 *
 278 *     ==== Deflation detection loop ====
 279 *
 280       NS = JW
 281       ILST = INFQR + 1
 282       DO 10 KNT = INFQR + 1, JW
 283 *
 284 *        ==== Small spike tip deflation test ====
 285 *
 286          FOO = CABS1( T( NS, NS ) )
 287          IF( FOO.EQ.RZERO )
 288      $      FOO = CABS1( S )
 289          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
 290      $        THEN
 291 *
 292 *           ==== One more converged eigenvalue ====
 293 *
 294             NS = NS - 1
 295          ELSE
 296 *
 297 *           ==== One undeflatable eigenvalue.  Move it up out of the
 298 *           .    way.   (CTREXC can not fail in this case.) ====
 299 *
 300             IFST = NS
 301             CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
 302             ILST = ILST + 1
 303          END IF
 304    10 CONTINUE
 305 *
 306 *        ==== Return to Hessenberg form ====
 307 *
 308       IF( NS.EQ.0 )
 309      $   S = ZERO
 310 *
 311       IF( NS.LT.JW ) THEN
 312 *
 313 *        ==== sorting the diagonal of T improves accuracy for
 314 *        .    graded matrices.  ====
 315 *
 316          DO 30 I = INFQR + 1, NS
 317             IFST = I
 318             DO 20 J = I + 1, NS
 319                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
 320      $            IFST = J
 321    20       CONTINUE
 322             ILST = I
 323             IF( IFST.NE.ILST )
 324      $         CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
 325    30    CONTINUE
 326       END IF
 327 *
 328 *     ==== Restore shift/eigenvalue array from T ====
 329 *
 330       DO 40 I = INFQR + 1, JW
 331          SH( KWTOP+I-1 ) = T( I, I )
 332    40 CONTINUE
 333 *
 334 *
 335       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
 336          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
 337 *
 338 *           ==== Reflect spike back into lower triangle ====
 339 *
 340             CALL CCOPY( NS, V, LDV, WORK, 1 )
 341             DO 50 I = 1, NS
 342                WORK( I ) = CONJG( WORK( I ) )
 343    50       CONTINUE
 344             BETA = WORK( 1 )
 345             CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU )
 346             WORK( 1 ) = ONE
 347 *
 348             CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
 349 *
 350             CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT,
 351      $                  WORK( JW+1 ) )
 352             CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
 353      $                  WORK( JW+1 ) )
 354             CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
 355      $                  WORK( JW+1 ) )
 356 *
 357             CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
 358      $                   LWORK-JW, INFO )
 359          END IF
 360 *
 361 *        ==== Copy updated reduced window into place ====
 362 *
 363          IF( KWTOP.GT.1 )
 364      $      H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) )
 365          CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
 366          CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
 367      $               LDH+1 )
 368 *
 369 *        ==== Accumulate orthogonal matrix in order update
 370 *        .    H and Z, if requested.  ====
 371 *
 372          IF( NS.GT.1 .AND. S.NE.ZERO )
 373      $      CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
 374      $                   WORK( JW+1 ), LWORK-JW, INFO )
 375 *
 376 *        ==== Update vertical slab in H ====
 377 *
 378          IF( WANTT ) THEN
 379             LTOP = 1
 380          ELSE
 381             LTOP = KTOP
 382          END IF
 383          DO 60 KROW = LTOP, KWTOP - 1, NV
 384             KLN = MIN( NV, KWTOP-KROW )
 385             CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
 386      $                  LDH, V, LDV, ZERO, WV, LDWV )
 387             CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
 388    60    CONTINUE
 389 *
 390 *        ==== Update horizontal slab in H ====
 391 *
 392          IF( WANTT ) THEN
 393             DO 70 KCOL = KBOT + 1, N, NH
 394                KLN = MIN( NH, N-KCOL+1 )
 395                CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
 396      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
 397                CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
 398      $                      LDH )
 399    70       CONTINUE
 400          END IF
 401 *
 402 *        ==== Update vertical slab in Z ====
 403 *
 404          IF( WANTZ ) THEN
 405             DO 80 KROW = ILOZ, IHIZ, NV
 406                KLN = MIN( NV, IHIZ-KROW+1 )
 407                CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
 408      $                     LDZ, V, LDV, ZERO, WV, LDWV )
 409                CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
 410      $                      LDZ )
 411    80       CONTINUE
 412          END IF
 413       END IF
 414 *
 415 *     ==== Return the number of deflations ... ====
 416 *
 417       ND = JW - NS
 418 *
 419 *     ==== ... and the number of shifts. (Subtracting
 420 *     .    INFQR from the spike length takes care
 421 *     .    of the case of a rare QR failure while
 422 *     .    calculating eigenvalues of the deflation
 423 *     .    window.)  ====
 424 *
 425       NS = NS - INFQR
 426 *
 427 *      ==== Return optimal workspace. ====
 428 *
 429       WORK( 1 ) = CMPLX( LWKOPT, 0 )
 430 *
 431 *     ==== End of CLAQR2 ====
 432 *
 433       END