SRC/dlaed2.f

   1 *> \brief \b DLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
   2 *
   3 *  =========== DOCUMENTATION ===========
   4 *
   5 * Online html documentation available at
   6 *            http://www.netlib.org/lapack/explore-html/
   7 *
   8 *> \htmlonly
   9 *> Download DLAED2 + dependencies
  10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
  11 *> [TGZ]</a>
  12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
  13 *> [ZIP]</a>
  14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
  15 *> [TXT]</a>
  16 *> \endhtmlonly
  17 *
  18 *  Definition:
  19 *  ===========
  20 *
  21 *       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
  22 *                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
  23 *
  24 *       .. Scalar Arguments ..
  25 *       INTEGER            INFO, K, LDQ, N, N1
  26 *       DOUBLE PRECISION   RHO
  27 *       ..
  28 *       .. Array Arguments ..
  29 *       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
  30 *      $                   INDXQ( * )
  31 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
  32 *      $                   W( * ), Z( * )
  33 *       ..
  34 *
  35 *
  36 *> \par Purpose:
  37 *  =============
  38 *>
  39 *> \verbatim
  40 *>
  41 *> DLAED2 merges the two sets of eigenvalues together into a single
  42 *> sorted set.  Then it tries to deflate the size of the problem.
  43 *> There are two ways in which deflation can occur:  when two or more
  44 *> eigenvalues are close together or if there is a tiny entry in the
  45 *> Z vector.  For each such occurrence the order of the related secular
  46 *> equation problem is reduced by one.
  47 *> \endverbatim
  48 *
  49 *  Arguments:
  50 *  ==========
  51 *
  52 *> \param[out] K
  53 *> \verbatim
  54 *>          K is INTEGER
  55 *>         The number of non-deflated eigenvalues, and the order of the
  56 *>         related secular equation. 0 <= K <=N.
  57 *> \endverbatim
  58 *>
  59 *> \param[in] N
  60 *> \verbatim
  61 *>          N is INTEGER
  62 *>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
  63 *> \endverbatim
  64 *>
  65 *> \param[in] N1
  66 *> \verbatim
  67 *>          N1 is INTEGER
  68 *>         The location of the last eigenvalue in the leading sub-matrix.
  69 *>         min(1,N) <= N1 <= N/2.
  70 *> \endverbatim
  71 *>
  72 *> \param[in,out] D
  73 *> \verbatim
  74 *>          D is DOUBLE PRECISION array, dimension (N)
  75 *>         On entry, D contains the eigenvalues of the two submatrices to
  76 *>         be combined.
  77 *>         On exit, D contains the trailing (N-K) updated eigenvalues
  78 *>         (those which were deflated) sorted into increasing order.
  79 *> \endverbatim
  80 *>
  81 *> \param[in,out] Q
  82 *> \verbatim
  83 *>          Q is DOUBLE PRECISION array, dimension (LDQ, N)
  84 *>         On entry, Q contains the eigenvectors of two submatrices in
  85 *>         the two square blocks with corners at (1,1), (N1,N1)
  86 *>         and (N1+1, N1+1), (N,N).
  87 *>         On exit, Q contains the trailing (N-K) updated eigenvectors
  88 *>         (those which were deflated) in its last N-K columns.
  89 *> \endverbatim
  90 *>
  91 *> \param[in] LDQ
  92 *> \verbatim
  93 *>          LDQ is INTEGER
  94 *>         The leading dimension of the array Q.  LDQ >= max(1,N).
  95 *> \endverbatim
  96 *>
  97 *> \param[in,out] INDXQ
  98 *> \verbatim
  99 *>          INDXQ is INTEGER array, dimension (N)
 100 *>         The permutation which separately sorts the two sub-problems
 101 *>         in D into ascending order.  Note that elements in the second
 102 *>         half of this permutation must first have N1 added to their
 103 *>         values. Destroyed on exit.
 104 *> \endverbatim
 105 *>
 106 *> \param[in,out] RHO
 107 *> \verbatim
 108 *>          RHO is DOUBLE PRECISION
 109 *>         On entry, the off-diagonal element associated with the rank-1
 110 *>         cut which originally split the two submatrices which are now
 111 *>         being recombined.
 112 *>         On exit, RHO has been modified to the value required by
 113 *>         DLAED3.
 114 *> \endverbatim
 115 *>
 116 *> \param[in] Z
 117 *> \verbatim
 118 *>          Z is DOUBLE PRECISION array, dimension (N)
 119 *>         On entry, Z contains the updating vector (the last
 120 *>         row of the first sub-eigenvector matrix and the first row of
 121 *>         the second sub-eigenvector matrix).
 122 *>         On exit, the contents of Z have been destroyed by the updating
 123 *>         process.
 124 *> \endverbatim
 125 *>
 126 *> \param[out] DLAMDA
 127 *> \verbatim
 128 *>          DLAMDA is DOUBLE PRECISION array, dimension (N)
 129 *>         A copy of the first K eigenvalues which will be used by
 130 *>         DLAED3 to form the secular equation.
 131 *> \endverbatim
 132 *>
 133 *> \param[out] W
 134 *> \verbatim
 135 *>          W is DOUBLE PRECISION array, dimension (N)
 136 *>         The first k values of the final deflation-altered z-vector
 137 *>         which will be passed to DLAED3.
 138 *> \endverbatim
 139 *>
 140 *> \param[out] Q2
 141 *> \verbatim
 142 *>          Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
 143 *>         A copy of the first K eigenvectors which will be used by
 144 *>         DLAED3 in a matrix multiply (DGEMM) to solve for the new
 145 *>         eigenvectors.
 146 *> \endverbatim
 147 *>
 148 *> \param[out] INDX
 149 *> \verbatim
 150 *>          INDX is INTEGER array, dimension (N)
 151 *>         The permutation used to sort the contents of DLAMDA into
 152 *>         ascending order.
 153 *> \endverbatim
 154 *>
 155 *> \param[out] INDXC
 156 *> \verbatim
 157 *>          INDXC is INTEGER array, dimension (N)
 158 *>         The permutation used to arrange the columns of the deflated
 159 *>         Q matrix into three groups:  the first group contains non-zero
 160 *>         elements only at and above N1, the second contains
 161 *>         non-zero elements only below N1, and the third is dense.
 162 *> \endverbatim
 163 *>
 164 *> \param[out] INDXP
 165 *> \verbatim
 166 *>          INDXP is INTEGER array, dimension (N)
 167 *>         The permutation used to place deflated values of D at the end
 168 *>         of the array.  INDXP(1:K) points to the nondeflated D-values
 169 *>         and INDXP(K+1:N) points to the deflated eigenvalues.
 170 *> \endverbatim
 171 *>
 172 *> \param[out] COLTYP
 173 *> \verbatim
 174 *>          COLTYP is INTEGER array, dimension (N)
 175 *>         During execution, a label which will indicate which of the
 176 *>         following types a column in the Q2 matrix is:
 177 *>         1 : non-zero in the upper half only;
 178 *>         2 : dense;
 179 *>         3 : non-zero in the lower half only;
 180 *>         4 : deflated.
 181 *>         On exit, COLTYP(i) is the number of columns of type i,
 182 *>         for i=1 to 4 only.
 183 *> \endverbatim
 184 *>
 185 *> \param[out] INFO
 186 *> \verbatim
 187 *>          INFO is INTEGER
 188 *>          = 0:  successful exit.
 189 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 190 *> \endverbatim
 191 *
 192 *  Authors:
 193 *  ========
 194 *
 195 *> \author Univ. of Tennessee
 196 *> \author Univ. of California Berkeley
 197 *> \author Univ. of Colorado Denver
 198 *> \author NAG Ltd.
 199 *
 200 *> \date September 2012
 201 *
 202 *> \ingroup auxOTHERcomputational
 203 *
 204 *> \par Contributors:
 205 *  ==================
 206 *>
 207 *> Jeff Rutter, Computer Science Division, University of California
 208 *> at Berkeley, USA \n
 209 *>  Modified by Francoise Tisseur, University of Tennessee
 210 *>
 211 *  =====================================================================
 212       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
 213      $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
 214 *
 215 *  -- LAPACK computational routine (version 3.4.2) --
 216 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 217 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 218 *     September 2012
 219 *
 220 *     .. Scalar Arguments ..
 221       INTEGER            INFO, K, LDQ, N, N1
 222       DOUBLE PRECISION   RHO
 223 *     ..
 224 *     .. Array Arguments ..
 225       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
 226      $                   INDXQ( * )
 227       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 228      $                   W( * ), Z( * )
 229 *     ..
 230 *
 231 *  =====================================================================
 232 *
 233 *     .. Parameters ..
 234       DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
 235       PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
 236      $                   TWO = 2.0D0, EIGHT = 8.0D0 )
 237 *     ..
 238 *     .. Local Arrays ..
 239       INTEGER            CTOT( 4 ), PSM( 4 )
 240 *     ..
 241 *     .. Local Scalars ..
 242       INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
 243      $                   N2, NJ, PJ
 244       DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
 245 *     ..
 246 *     .. External Functions ..
 247       INTEGER            IDAMAX
 248       DOUBLE PRECISION   DLAMCH, DLAPY2
 249       EXTERNAL           IDAMAX, DLAMCH, DLAPY2
 250 *     ..
 251 *     .. External Subroutines ..
 252       EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
 253 *     ..
 254 *     .. Intrinsic Functions ..
 255       INTRINSIC          ABS, MAX, MIN, SQRT
 256 *     ..
 257 *     .. Executable Statements ..
 258 *
 259 *     Test the input parameters.
 260 *
 261       INFO = 0
 262 *
 263       IF( N.LT.0 ) THEN
 264          INFO = -2
 265       ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
 266          INFO = -6
 267       ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
 268          INFO = -3
 269       END IF
 270       IF( INFO.NE.0 ) THEN
 271          CALL XERBLA( 'DLAED2', -INFO )
 272          RETURN
 273       END IF
 274 *
 275 *     Quick return if possible
 276 *
 277       IF( N.EQ.0 )
 278      $   RETURN
 279 *
 280       N2 = N - N1
 281       N1P1 = N1 + 1
 282 *
 283       IF( RHO.LT.ZERO ) THEN
 284          CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
 285       END IF
 286 *
 287 *     Normalize z so that norm(z) = 1.  Since z is the concatenation of
 288 *     two normalized vectors, norm2(z) = sqrt(2).
 289 *
 290       T = ONE / SQRT( TWO )
 291       CALL DSCAL( N, T, Z, 1 )
 292 *
 293 *     RHO = ABS( norm(z)**2 * RHO )
 294 *
 295       RHO = ABS( TWO*RHO )
 296 *
 297 *     Sort the eigenvalues into increasing order
 298 *
 299       DO 10 I = N1P1, N
 300          INDXQ( I ) = INDXQ( I ) + N1
 301    10 CONTINUE
 302 *
 303 *     re-integrate the deflated parts from the last pass
 304 *
 305       DO 20 I = 1, N
 306          DLAMDA( I ) = D( INDXQ( I ) )
 307    20 CONTINUE
 308       CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
 309       DO 30 I = 1, N
 310          INDX( I ) = INDXQ( INDXC( I ) )
 311    30 CONTINUE
 312 *
 313 *     Calculate the allowable deflation tolerance
 314 *
 315       IMAX = IDAMAX( N, Z, 1 )
 316       JMAX = IDAMAX( N, D, 1 )
 317       EPS = DLAMCH( 'Epsilon' )
 318       TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
 319 *
 320 *     If the rank-1 modifier is small enough, no more needs to be done
 321 *     except to reorganize Q so that its columns correspond with the
 322 *     elements in D.
 323 *
 324       IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
 325          K = 0
 326          IQ2 = 1
 327          DO 40 J = 1, N
 328             I = INDX( J )
 329             CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
 330             DLAMDA( J ) = D( I )
 331             IQ2 = IQ2 + N
 332    40    CONTINUE
 333          CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
 334          CALL DCOPY( N, DLAMDA, 1, D, 1 )
 335          GO TO 190
 336       END IF
 337 *
 338 *     If there are multiple eigenvalues then the problem deflates.  Here
 339 *     the number of equal eigenvalues are found.  As each equal
 340 *     eigenvalue is found, an elementary reflector is computed to rotate
 341 *     the corresponding eigensubspace so that the corresponding
 342 *     components of Z are zero in this new basis.
 343 *
 344       DO 50 I = 1, N1
 345          COLTYP( I ) = 1
 346    50 CONTINUE
 347       DO 60 I = N1P1, N
 348          COLTYP( I ) = 3
 349    60 CONTINUE
 350 *
 351 *
 352       K = 0
 353       K2 = N + 1
 354       DO 70 J = 1, N
 355          NJ = INDX( J )
 356          IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
 357 *
 358 *           Deflate due to small z component.
 359 *
 360             K2 = K2 - 1
 361             COLTYP( NJ ) = 4
 362             INDXP( K2 ) = NJ
 363             IF( J.EQ.N )
 364      $         GO TO 100
 365          ELSE
 366             PJ = NJ
 367             GO TO 80
 368          END IF
 369    70 CONTINUE
 370    80 CONTINUE
 371       J = J + 1
 372       NJ = INDX( J )
 373       IF( J.GT.N )
 374      $   GO TO 100
 375       IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
 376 *
 377 *        Deflate due to small z component.
 378 *
 379          K2 = K2 - 1
 380          COLTYP( NJ ) = 4
 381          INDXP( K2 ) = NJ
 382       ELSE
 383 *
 384 *        Check if eigenvalues are close enough to allow deflation.
 385 *
 386          S = Z( PJ )
 387          C = Z( NJ )
 388 *
 389 *        Find sqrt(a**2+b**2) without overflow or
 390 *        destructive underflow.
 391 *
 392          TAU = DLAPY2( C, S )
 393          T = D( NJ ) - D( PJ )
 394          C = C / TAU
 395          S = -S / TAU
 396          IF( ABS( T*C*S ).LE.TOL ) THEN
 397 *
 398 *           Deflation is possible.
 399 *
 400             Z( NJ ) = TAU
 401             Z( PJ ) = ZERO
 402             IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
 403      $         COLTYP( NJ ) = 2
 404             COLTYP( PJ ) = 4
 405             CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
 406             T = D( PJ )*C**2 + D( NJ )*S**2
 407             D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
 408             D( PJ ) = T
 409             K2 = K2 - 1
 410             I = 1
 411    90       CONTINUE
 412             IF( K2+I.LE.N ) THEN
 413                IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
 414                   INDXP( K2+I-1 ) = INDXP( K2+I )
 415                   INDXP( K2+I ) = PJ
 416                   I = I + 1
 417                   GO TO 90
 418                ELSE
 419                   INDXP( K2+I-1 ) = PJ
 420                END IF
 421             ELSE
 422                INDXP( K2+I-1 ) = PJ
 423             END IF
 424             PJ = NJ
 425          ELSE
 426             K = K + 1
 427             DLAMDA( K ) = D( PJ )
 428             W( K ) = Z( PJ )
 429             INDXP( K ) = PJ
 430             PJ = NJ
 431          END IF
 432       END IF
 433       GO TO 80
 434   100 CONTINUE
 435 *
 436 *     Record the last eigenvalue.
 437 *
 438       K = K + 1
 439       DLAMDA( K ) = D( PJ )
 440       W( K ) = Z( PJ )
 441       INDXP( K ) = PJ
 442 *
 443 *     Count up the total number of the various types of columns, then
 444 *     form a permutation which positions the four column types into
 445 *     four uniform groups (although one or more of these groups may be
 446 *     empty).
 447 *
 448       DO 110 J = 1, 4
 449          CTOT( J ) = 0
 450   110 CONTINUE
 451       DO 120 J = 1, N
 452          CT = COLTYP( J )
 453          CTOT( CT ) = CTOT( CT ) + 1
 454   120 CONTINUE
 455 *
 456 *     PSM(*) = Position in SubMatrix (of types 1 through 4)
 457 *
 458       PSM( 1 ) = 1
 459       PSM( 2 ) = 1 + CTOT( 1 )
 460       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
 461       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
 462       K = N - CTOT( 4 )
 463 *
 464 *     Fill out the INDXC array so that the permutation which it induces
 465 *     will place all type-1 columns first, all type-2 columns next,
 466 *     then all type-3's, and finally all type-4's.
 467 *
 468       DO 130 J = 1, N
 469          JS = INDXP( J )
 470          CT = COLTYP( JS )
 471          INDX( PSM( CT ) ) = JS
 472          INDXC( PSM( CT ) ) = J
 473          PSM( CT ) = PSM( CT ) + 1
 474   130 CONTINUE
 475 *
 476 *     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
 477 *     and Q2 respectively.  The eigenvalues/vectors which were not
 478 *     deflated go into the first K slots of DLAMDA and Q2 respectively,
 479 *     while those which were deflated go into the last N - K slots.
 480 *
 481       I = 1
 482       IQ1 = 1
 483       IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
 484       DO 140 J = 1, CTOT( 1 )
 485          JS = INDX( I )
 486          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
 487          Z( I ) = D( JS )
 488          I = I + 1
 489          IQ1 = IQ1 + N1
 490   140 CONTINUE
 491 *
 492       DO 150 J = 1, CTOT( 2 )
 493          JS = INDX( I )
 494          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
 495          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
 496          Z( I ) = D( JS )
 497          I = I + 1
 498          IQ1 = IQ1 + N1
 499          IQ2 = IQ2 + N2
 500   150 CONTINUE
 501 *
 502       DO 160 J = 1, CTOT( 3 )
 503          JS = INDX( I )
 504          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
 505          Z( I ) = D( JS )
 506          I = I + 1
 507          IQ2 = IQ2 + N2
 508   160 CONTINUE
 509 *
 510       IQ1 = IQ2
 511       DO 170 J = 1, CTOT( 4 )
 512          JS = INDX( I )
 513          CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
 514          IQ2 = IQ2 + N
 515          Z( I ) = D( JS )
 516          I = I + 1
 517   170 CONTINUE
 518 *
 519 *     The deflated eigenvalues and their corresponding vectors go back
 520 *     into the last N - K slots of D and Q respectively.
 521 *
 522       IF( K.LT.N ) THEN
 523          CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
 524      $                Q( 1, K+1 ), LDQ )
 525          CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
 526       END IF
 527 *
 528 *     Copy CTOT into COLTYP for referencing in DLAED3.
 529 *
 530       DO 180 J = 1, 4
 531          COLTYP( J ) = CTOT( J )
 532   180 CONTINUE
 533 *
 534   190 CONTINUE
 535       RETURN
 536 *
 537 *     End of DLAED2
 538 *
 539       END