SRC/sgghd3.f

   1 *> \brief \b SGGHD3
   2 *
   3 *  =========== DOCUMENTATION ===========
   4 *
   5 * Online html documentation available at
   6 *            http://www.netlib.org/lapack/explore-html/
   7 *
   8 *> \htmlonly
   9 *> Download SGGHRD + dependencies
  10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghd3.f">
  11 *> [TGZ]</a>
  12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghd3.f">
  13 *> [ZIP]</a>
  14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghd3.f">
  15 *> [TXT]</a>
  16 *> \endhtmlonly
  17 *
  18 *  Definition:
  19 *  ===========
  20 *
  21 *       SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
  22 *                          LDQ, Z, LDZ, WORK, LWORK, INFO )
  23 *
  24 *       .. Scalar Arguments ..
  25 *       CHARACTER          COMPQ, COMPZ
  26 *       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
  27 *       ..
  28 *       .. Array Arguments ..
  29 *       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
  30 *      $                   Z( LDZ, * ), WORK( * )
  31 *       ..
  32 *
  33 *
  34 *> \par Purpose:
  35 *  =============
  36 *>
  37 *> \verbatim
  38 *>
  39 *> SGGHD3 reduces a pair of real matrices (A,B) to generalized upper
  40 *> Hessenberg form using orthogonal transformations, where A is a
  41 *> general matrix and B is upper triangular.  The form of the
  42 *> generalized eigenvalue problem is
  43 *>    A*x = lambda*B*x,
  44 *> and B is typically made upper triangular by computing its QR
  45 *> factorization and moving the orthogonal matrix Q to the left side
  46 *> of the equation.
  47 *>
  48 *> This subroutine simultaneously reduces A to a Hessenberg matrix H:
  49 *>    Q**T*A*Z = H
  50 *> and transforms B to another upper triangular matrix T:
  51 *>    Q**T*B*Z = T
  52 *> in order to reduce the problem to its standard form
  53 *>    H*y = lambda*T*y
  54 *> where y = Z**T*x.
  55 *>
  56 *> The orthogonal matrices Q and Z are determined as products of Givens
  57 *> rotations.  They may either be formed explicitly, or they may be
  58 *> postmultiplied into input matrices Q1 and Z1, so that
  59 *>
  60 *>      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
  61 *>
  62 *>      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
  63 *>
  64 *> If Q1 is the orthogonal matrix from the QR factorization of B in the
  65 *> original equation A*x = lambda*B*x, then SGGHD3 reduces the original
  66 *> problem to generalized Hessenberg form.
  67 *>
  68 *> This is a blocked variant of SGGHRD, using matrix-matrix
  69 *> multiplications for parts of the computation to enhance performance.
  70 *> \endverbatim
  71 *
  72 *  Arguments:
  73 *  ==========
  74 *
  75 *> \param[in] COMPQ
  76 *> \verbatim
  77 *>          COMPQ is CHARACTER*1
  78 *>          = 'N': do not compute Q;
  79 *>          = 'I': Q is initialized to the unit matrix, and the
  80 *>                 orthogonal matrix Q is returned;
  81 *>          = 'V': Q must contain an orthogonal matrix Q1 on entry,
  82 *>                 and the product Q1*Q is returned.
  83 *> \endverbatim
  84 *>
  85 *> \param[in] COMPZ
  86 *> \verbatim
  87 *>          COMPZ is CHARACTER*1
  88 *>          = 'N': do not compute Z;
  89 *>          = 'I': Z is initialized to the unit matrix, and the
  90 *>                 orthogonal matrix Z is returned;
  91 *>          = 'V': Z must contain an orthogonal matrix Z1 on entry,
  92 *>                 and the product Z1*Z is returned.
  93 *> \endverbatim
  94 *>
  95 *> \param[in] N
  96 *> \verbatim
  97 *>          N is INTEGER
  98 *>          The order of the matrices A and B.  N >= 0.
  99 *> \endverbatim
 100 *>
 101 *> \param[in] ILO
 102 *> \verbatim
 103 *>          ILO is INTEGER
 104 *> \endverbatim
 105 *>
 106 *> \param[in] IHI
 107 *> \verbatim
 108 *>          IHI is INTEGER
 109 *>
 110 *>          ILO and IHI mark the rows and columns of A which are to be
 111 *>          reduced.  It is assumed that A is already upper triangular
 112 *>          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are
 113 *>          normally set by a previous call to SGGBAL; otherwise they
 114 *>          should be set to 1 and N respectively.
 115 *>          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 116 *> \endverbatim
 117 *>
 118 *> \param[in,out] A
 119 *> \verbatim
 120 *>          A is REAL array, dimension (LDA, N)
 121 *>          On entry, the N-by-N general matrix to be reduced.
 122 *>          On exit, the upper triangle and the first subdiagonal of A
 123 *>          are overwritten with the upper Hessenberg matrix H, and the
 124 *>          rest is set to zero.
 125 *> \endverbatim
 126 *>
 127 *> \param[in] LDA
 128 *> \verbatim
 129 *>          LDA is INTEGER
 130 *>          The leading dimension of the array A.  LDA >= max(1,N).
 131 *> \endverbatim
 132 *>
 133 *> \param[in,out] B
 134 *> \verbatim
 135 *>          B is REAL array, dimension (LDB, N)
 136 *>          On entry, the N-by-N upper triangular matrix B.
 137 *>          On exit, the upper triangular matrix T = Q**T B Z.  The
 138 *>          elements below the diagonal are set to zero.
 139 *> \endverbatim
 140 *>
 141 *> \param[in] LDB
 142 *> \verbatim
 143 *>          LDB is INTEGER
 144 *>          The leading dimension of the array B.  LDB >= max(1,N).
 145 *> \endverbatim
 146 *>
 147 *> \param[in,out] Q
 148 *> \verbatim
 149 *>          Q is REAL array, dimension (LDQ, N)
 150 *>          On entry, if COMPQ = 'V', the orthogonal matrix Q1,
 151 *>          typically from the QR factorization of B.
 152 *>          On exit, if COMPQ='I', the orthogonal matrix Q, and if
 153 *>          COMPQ = 'V', the product Q1*Q.
 154 *>          Not referenced if COMPQ='N'.
 155 *> \endverbatim
 156 *>
 157 *> \param[in] LDQ
 158 *> \verbatim
 159 *>          LDQ is INTEGER
 160 *>          The leading dimension of the array Q.
 161 *>          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
 162 *> \endverbatim
 163 *>
 164 *> \param[in,out] Z
 165 *> \verbatim
 166 *>          Z is REAL array, dimension (LDZ, N)
 167 *>          On entry, if COMPZ = 'V', the orthogonal matrix Z1.
 168 *>          On exit, if COMPZ='I', the orthogonal matrix Z, and if
 169 *>          COMPZ = 'V', the product Z1*Z.
 170 *>          Not referenced if COMPZ='N'.
 171 *> \endverbatim
 172 *>
 173 *> \param[in] LDZ
 174 *> \verbatim
 175 *>          LDZ is INTEGER
 176 *>          The leading dimension of the array Z.
 177 *>          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
 178 *> \endverbatim
 179 *>
 180 *> \param[out] WORK
 181 *> \verbatim
 182 *>          WORK is REAL array, dimension (LWORK)
 183 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 184 *> \endverbatim
 185 *>
 186 *> \param[in]  LWORK
 187 *> \verbatim
 188 *>          LWORK is INTEGER
 189 *>          The length of the array WORK.  LWORK >= 1.
 190 *>          For optimum performance LWORK >= 6*N*NB, where NB is the
 191 *>          optimal blocksize.
 192 *>
 193 *>          If LWORK = -1, then a workspace query is assumed; the routine
 194 *>          only calculates the optimal size of the WORK array, returns
 195 *>          this value as the first entry of the WORK array, and no error
 196 *>          message related to LWORK is issued by XERBLA.
 197 *> \endverbatim
 198 *>
 199 *> \param[out] INFO
 200 *> \verbatim
 201 *>          INFO is INTEGER
 202 *>          = 0:  successful exit.
 203 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
 204 *> \endverbatim
 205 *
 206 *  Authors:
 207 *  ========
 208 *
 209 *> \author Univ. of Tennessee
 210 *> \author Univ. of California Berkeley
 211 *> \author Univ. of Colorado Denver
 212 *> \author NAG Ltd.
 213 *
 214 *> \date January 2015
 215 *
 216 *> \ingroup realOTHERcomputational
 217 *
 218 *> \par Further Details:
 219 *  =====================
 220 *>
 221 *> \verbatim
 222 *>
 223 *>  This routine reduces A to Hessenberg form and maintains B in
 224 *>  using a blocked variant of Moler and Stewart's original algorithm,
 225 *>  as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
 226 *>  (BIT 2008).
 227 *> \endverbatim
 228 *>
 229 *  =====================================================================
 230       SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
 231      $                   LDQ, Z, LDZ, WORK, LWORK, INFO )
 232 *
 233 *  -- LAPACK computational routine (version 3.6.1) --
 234 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 235 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 236 *     January 2015
 237 *
 238       IMPLICIT NONE
 239 *
 240 *     .. Scalar Arguments ..
 241       CHARACTER          COMPQ, COMPZ
 242       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
 243 *     ..
 244 *     .. Array Arguments ..
 245       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
 246      $                   Z( LDZ, * ), WORK( * )
 247 *     ..
 248 *
 249 *  =====================================================================
 250 *
 251 *     .. Parameters ..
 252       REAL               ZERO, ONE
 253       PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
 254 *     ..
 255 *     .. Local Scalars ..
 256       LOGICAL            BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
 257       CHARACTER*1        COMPQ2, COMPZ2
 258       INTEGER            COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
 259      $                   KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
 260      $                   NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
 261       REAL               C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
 262 *     ..
 263 *     .. External Functions ..
 264       LOGICAL            LSAME
 265       INTEGER            ILAENV
 266       EXTERNAL           ILAENV, LSAME
 267 *     ..
 268 *     .. External Subroutines ..
 269       EXTERNAL           SGGHRD, SLARTG, SLASET, SORM22, SROT, XERBLA
 270 *     ..
 271 *     .. Intrinsic Functions ..
 272       INTRINSIC          REAL, MAX
 273 *     ..
 274 *     .. Executable Statements ..
 275 *
 276 *     Decode and test the input parameters.
 277 *
 278       INFO = 0
 279       NB = ILAENV( 1, 'SGGHD3', ' ', N, ILO, IHI, -1 )
 280       LWKOPT = MAX( 6*N*NB, 1 )
 281       WORK( 1 ) = REAL( LWKOPT )
 282       INITQ = LSAME( COMPQ, 'I' )
 283       WANTQ = INITQ .OR. LSAME( COMPQ, 'V' )
 284       INITZ = LSAME( COMPZ, 'I' )
 285       WANTZ = INITZ .OR. LSAME( COMPZ, 'V' )
 286       LQUERY = ( LWORK.EQ.-1 )
 287 *
 288       IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
 289          INFO = -1
 290       ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN
 291          INFO = -2
 292       ELSE IF( N.LT.0 ) THEN
 293          INFO = -3
 294       ELSE IF( ILO.LT.1 ) THEN
 295          INFO = -4
 296       ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
 297          INFO = -5
 298       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
 299          INFO = -7
 300       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
 301          INFO = -9
 302       ELSE IF( ( WANTQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN
 303          INFO = -11
 304       ELSE IF( ( WANTZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN
 305          INFO = -13
 306       ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
 307          INFO = -15
 308       END IF
 309       IF( INFO.NE.0 ) THEN
 310          CALL XERBLA( 'SGGHD3', -INFO )
 311          RETURN
 312       ELSE IF( LQUERY ) THEN
 313          RETURN
 314       END IF
 315 *
 316 *     Initialize Q and Z if desired.
 317 *
 318       IF( INITQ )
 319      $   CALL SLASET( 'All', N, N, ZERO, ONE, Q, LDQ )
 320       IF( INITZ )
 321      $   CALL SLASET( 'All', N, N, ZERO, ONE, Z, LDZ )
 322 *
 323 *     Zero out lower triangle of B.
 324 *
 325       IF( N.GT.1 )
 326      $   CALL SLASET( 'Lower', N-1, N-1, ZERO, ZERO, B(2, 1), LDB )
 327 *
 328 *     Quick return if possible
 329 *
 330       NH = IHI - ILO + 1
 331       IF( NH.LE.1 ) THEN
 332          WORK( 1 ) = ONE
 333          RETURN
 334       END IF
 335 *
 336 *     Determine the blocksize.
 337 *
 338       NBMIN = ILAENV( 2, 'SGGHD3', ' ', N, ILO, IHI, -1 )
 339       IF( NB.GT.1 .AND. NB.LT.NH ) THEN
 340 *
 341 *        Determine when to use unblocked instead of blocked code.
 342 *
 343          NX = MAX( NB, ILAENV( 3, 'SGGHD3', ' ', N, ILO, IHI, -1 ) )
 344          IF( NX.LT.NH ) THEN
 345 *
 346 *           Determine if workspace is large enough for blocked code.
 347 *
 348             IF( LWORK.LT.LWKOPT ) THEN
 349 *
 350 *              Not enough workspace to use optimal NB:  determine the
 351 *              minimum value of NB, and reduce NB or force use of
 352 *              unblocked code.
 353 *
 354                NBMIN = MAX( 2, ILAENV( 2, 'SGGHD3', ' ', N, ILO, IHI,
 355      $                 -1 ) )
 356                IF( LWORK.GE.6*N*NBMIN ) THEN
 357                   NB = LWORK / ( 6*N )
 358                ELSE
 359                   NB = 1
 360                END IF
 361             END IF
 362          END IF
 363       END IF
 364 *
 365       IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
 366 *
 367 *        Use unblocked code below
 368 *
 369          JCOL = ILO
 370 *
 371       ELSE
 372 *
 373 *        Use blocked code
 374 *
 375          KACC22 = ILAENV( 16, 'SGGHD3', ' ', N, ILO, IHI, -1 )
 376          BLK22 = KACC22.EQ.2
 377          DO JCOL = ILO, IHI-2, NB
 378             NNB = MIN( NB, IHI-JCOL-1 )
 379 *
 380 *           Initialize small orthogonal factors that will hold the
 381 *           accumulated Givens rotations in workspace.
 382 *           N2NB   denotes the number of 2*NNB-by-2*NNB factors
 383 *           NBLST  denotes the (possibly smaller) order of the last
 384 *                  factor.
 385 *
 386             N2NB = ( IHI-JCOL-1 ) / NNB - 1
 387             NBLST = IHI - JCOL - N2NB*NNB
 388             CALL SLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK, NBLST )
 389             PW = NBLST * NBLST + 1
 390             DO I = 1, N2NB
 391                CALL SLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE,
 392      $                      WORK( PW ), 2*NNB )
 393                PW = PW + 4*NNB*NNB
 394             END DO
 395 *
 396 *           Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
 397 *
 398             DO J = JCOL, JCOL+NNB-1
 399 *
 400 *              Reduce Jth column of A. Store cosines and sines in Jth
 401 *              column of A and B, respectively.
 402 *
 403                DO I = IHI, J+2, -1
 404                   TEMP = A( I-1, J )
 405                   CALL SLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) )
 406                   A( I, J ) = C
 407                   B( I, J ) = S
 408                END DO
 409 *
 410 *              Accumulate Givens rotations into workspace array.
 411 *
 412                PPW  = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1
 413                LEN  = 2 + J - JCOL
 414                JROW = J + N2NB*NNB + 2
 415                DO I = IHI, JROW, -1
 416                   C = A( I, J )
 417                   S = B( I, J )
 418                   DO JJ = PPW, PPW+LEN-1
 419                      TEMP = WORK( JJ + NBLST )
 420                      WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ )
 421                      WORK( JJ ) = S*TEMP + C*WORK( JJ )
 422                   END DO
 423                   LEN = LEN + 1
 424                   PPW = PPW - NBLST - 1
 425                END DO
 426 *
 427                PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB
 428                J0 = JROW - NNB
 429                DO JROW = J0, J+2, -NNB
 430                   PPW = PPWO
 431                   LEN  = 2 + J - JCOL
 432                   DO I = JROW+NNB-1, JROW, -1
 433                      C = A( I, J )
 434                      S = B( I, J )
 435                      DO JJ = PPW, PPW+LEN-1
 436                         TEMP = WORK( JJ + 2*NNB )
 437                         WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ )
 438                         WORK( JJ ) = S*TEMP + C*WORK( JJ )
 439                      END DO
 440                      LEN = LEN + 1
 441                      PPW = PPW - 2*NNB - 1
 442                   END DO
 443                   PPWO = PPWO + 4*NNB*NNB
 444                END DO
 445 *
 446 *              TOP denotes the number of top rows in A and B that will
 447 *              not be updated during the next steps.
 448 *
 449                IF( JCOL.LE.2 ) THEN
 450                   TOP = 0
 451                ELSE
 452                   TOP = JCOL
 453                END IF
 454 *
 455 *              Propagate transformations through B and replace stored
 456 *              left sines/cosines by right sines/cosines.
 457 *
 458                DO JJ = N, J+1, -1
 459 *
 460 *                 Update JJth column of B.
 461 *
 462                   DO I = MIN( JJ+1, IHI ), J+2, -1
 463                      C = A( I, J )
 464                      S = B( I, J )
 465                      TEMP = B( I, JJ )
 466                      B( I, JJ ) = C*TEMP - S*B( I-1, JJ )
 467                      B( I-1, JJ ) = S*TEMP + C*B( I-1, JJ )
 468                   END DO
 469 *
 470 *                 Annihilate B( JJ+1, JJ ).
 471 *
 472                   IF( JJ.LT.IHI ) THEN
 473                      TEMP = B( JJ+1, JJ+1 )
 474                      CALL SLARTG( TEMP, B( JJ+1, JJ ), C, S,
 475      $                            B( JJ+1, JJ+1 ) )
 476                      B( JJ+1, JJ ) = ZERO
 477                      CALL SROT( JJ-TOP, B( TOP+1, JJ+1 ), 1,
 478      $                          B( TOP+1, JJ ), 1, C, S )
 479                      A( JJ+1, J ) = C
 480                      B( JJ+1, J ) = -S
 481                   END IF
 482                END DO
 483 *
 484 *              Update A by transformations from right.
 485 *              Explicit loop unrolling provides better performance
 486 *              compared to SLASR.
 487 *               CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
 488 *     $                     IHI-J, A( J+2, J ), B( J+2, J ),
 489 *     $                     A( TOP+1, J+1 ), LDA )
 490 *
 491                JJ = MOD( IHI-J-1, 3 )
 492                DO I = IHI-J-3, JJ+1, -3
 493                   C = A( J+1+I, J )
 494                   S = -B( J+1+I, J )
 495                   C1 = A( J+2+I, J )
 496                   S1 = -B( J+2+I, J )
 497                   C2 = A( J+3+I, J )
 498                   S2 = -B( J+3+I, J )
 499 *
 500                   DO K = TOP+1, IHI
 501                      TEMP = A( K, J+I  )
 502                      TEMP1 = A( K, J+I+1 )
 503                      TEMP2 = A( K, J+I+2 )
 504                      TEMP3 = A( K, J+I+3 )
 505                      A( K, J+I+3 ) = C2*TEMP3 + S2*TEMP2
 506                      TEMP2 = -S2*TEMP3 + C2*TEMP2
 507                      A( K, J+I+2 ) = C1*TEMP2 + S1*TEMP1
 508                      TEMP1 = -S1*TEMP2 + C1*TEMP1
 509                      A( K, J+I+1 ) = C*TEMP1 + S*TEMP
 510                      A( K, J+I ) = -S*TEMP1 + C*TEMP
 511                   END DO
 512                END DO
 513 *
 514                IF( JJ.GT.0 ) THEN
 515                   DO I = JJ, 1, -1
 516                      CALL SROT( IHI-TOP, A( TOP+1, J+I+1 ), 1,
 517      $                          A( TOP+1, J+I ), 1, A( J+1+I, J ),
 518      $                          -B( J+1+I, J ) )
 519                   END DO
 520                END IF
 521 *
 522 *              Update (J+1)th column of A by transformations from left.
 523 *
 524                IF ( J .LT. JCOL + NNB - 1 ) THEN
 525                   LEN  = 1 + J - JCOL
 526 *
 527 *                 Multiply with the trailing accumulated orthogonal
 528 *                 matrix, which takes the form
 529 *
 530 *                        [  U11  U12  ]
 531 *                    U = [            ],
 532 *                        [  U21  U22  ]
 533 *
 534 *                 where U21 is a LEN-by-LEN matrix and U12 is lower
 535 *                 triangular.
 536 *
 537                   JROW = IHI - NBLST + 1
 538                   CALL SGEMV( 'Transpose', NBLST, LEN, ONE, WORK,
 539      $                        NBLST, A( JROW, J+1 ), 1, ZERO,
 540      $                        WORK( PW ), 1 )
 541                   PPW = PW + LEN
 542                   DO I = JROW, JROW+NBLST-LEN-1
 543                      WORK( PPW ) = A( I, J+1 )
 544                      PPW = PPW + 1
 545                   END DO
 546                   CALL STRMV( 'Lower', 'Transpose', 'Non-unit',
 547      $                        NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST,
 548      $                        WORK( PW+LEN ), 1 )
 549                   CALL SGEMV( 'Transpose', LEN, NBLST-LEN, ONE,
 550      $                        WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST,
 551      $                        A( JROW+NBLST-LEN, J+1 ), 1, ONE,
 552      $                        WORK( PW+LEN ), 1 )
 553                   PPW = PW
 554                   DO I = JROW, JROW+NBLST-1
 555                      A( I, J+1 ) = WORK( PPW )
 556                      PPW = PPW + 1
 557                   END DO
 558 *
 559 *                 Multiply with the other accumulated orthogonal
 560 *                 matrices, which take the form
 561 *
 562 *                        [  U11  U12   0  ]
 563 *                        [                ]
 564 *                    U = [  U21  U22   0  ],
 565 *                        [                ]
 566 *                        [   0    0    I  ]
 567 *
 568 *                 where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
 569 *                 matrix, U21 is a LEN-by-LEN upper triangular matrix
 570 *                 and U12 is an NNB-by-NNB lower triangular matrix.
 571 *
 572                   PPWO = 1 + NBLST*NBLST
 573                   J0 = JROW - NNB
 574                   DO JROW = J0, JCOL+1, -NNB
 575                      PPW = PW + LEN
 576                      DO I = JROW, JROW+NNB-1
 577                         WORK( PPW ) = A( I, J+1 )
 578                         PPW = PPW + 1
 579                      END DO
 580                      PPW = PW
 581                      DO I = JROW+NNB, JROW+NNB+LEN-1
 582                         WORK( PPW ) = A( I, J+1 )
 583                         PPW = PPW + 1
 584                      END DO
 585                      CALL STRMV( 'Upper', 'Transpose', 'Non-unit', LEN,
 586      $                           WORK( PPWO + NNB ), 2*NNB, WORK( PW ),
 587      $                           1 )
 588                      CALL STRMV( 'Lower', 'Transpose', 'Non-unit', NNB,
 589      $                           WORK( PPWO + 2*LEN*NNB ),
 590      $                           2*NNB, WORK( PW + LEN ), 1 )
 591                      CALL SGEMV( 'Transpose', NNB, LEN, ONE,
 592      $                           WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1,
 593      $                           ONE, WORK( PW ), 1 )
 594                      CALL SGEMV( 'Transpose', LEN, NNB, ONE,
 595      $                           WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB,
 596      $                           A( JROW+NNB, J+1 ), 1, ONE,
 597      $                           WORK( PW+LEN ), 1 )
 598                      PPW = PW
 599                      DO I = JROW, JROW+LEN+NNB-1
 600                         A( I, J+1 ) = WORK( PPW )
 601                         PPW = PPW + 1
 602                      END DO
 603                      PPWO = PPWO + 4*NNB*NNB
 604                   END DO
 605                END IF
 606             END DO
 607 *
 608 *           Apply accumulated orthogonal matrices to A.
 609 *
 610             COLA = N - JCOL - NNB + 1
 611             J = IHI - NBLST + 1
 612             CALL SGEMM( 'Transpose', 'No Transpose', NBLST,
 613      $                  COLA, NBLST, ONE, WORK, NBLST,
 614      $                  A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ),
 615      $                  NBLST )
 616             CALL SLACPY( 'All', NBLST, COLA, WORK( PW ), NBLST,
 617      $                   A( J, JCOL+NNB ), LDA )
 618             PPWO = NBLST*NBLST + 1
 619             J0 = J - NNB
 620             DO J = J0, JCOL+1, -NNB
 621                IF ( BLK22 ) THEN
 622 *
 623 *                 Exploit the structure of
 624 *
 625 *                        [  U11  U12  ]
 626 *                    U = [            ]
 627 *                        [  U21  U22  ],
 628 *
 629 *                 where all blocks are NNB-by-NNB, U21 is upper
 630 *                 triangular and U12 is lower triangular.
 631 *
 632                   CALL SORM22( 'Left', 'Transpose', 2*NNB, COLA, NNB,
 633      $                         NNB, WORK( PPWO ), 2*NNB,
 634      $                         A( J, JCOL+NNB ), LDA, WORK( PW ),
 635      $                         LWORK-PW+1, IERR )
 636                ELSE
 637 *
 638 *                 Ignore the structure of U.
 639 *
 640                   CALL SGEMM( 'Transpose', 'No Transpose', 2*NNB,
 641      $                        COLA, 2*NNB, ONE, WORK( PPWO ), 2*NNB,
 642      $                        A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ),
 643      $                        2*NNB )
 644                   CALL SLACPY( 'All', 2*NNB, COLA, WORK( PW ), 2*NNB,
 645      $                         A( J, JCOL+NNB ), LDA )
 646                END IF
 647                PPWO = PPWO + 4*NNB*NNB
 648             END DO
 649 *
 650 *           Apply accumulated orthogonal matrices to Q.
 651 *
 652             IF( WANTQ ) THEN
 653                J = IHI - NBLST + 1
 654                IF ( INITQ ) THEN
 655                   TOPQ = MAX( 2, J - JCOL + 1 )
 656                   NH  = IHI - TOPQ + 1
 657                ELSE
 658                   TOPQ = 1
 659                   NH = N
 660                END IF
 661                CALL SGEMM( 'No Transpose', 'No Transpose', NH,
 662      $                     NBLST, NBLST, ONE, Q( TOPQ, J ), LDQ,
 663      $                     WORK, NBLST, ZERO, WORK( PW ), NH )
 664                CALL SLACPY( 'All', NH, NBLST, WORK( PW ), NH,
 665      $                      Q( TOPQ, J ), LDQ )
 666                PPWO = NBLST*NBLST + 1
 667                J0 = J - NNB
 668                DO J = J0, JCOL+1, -NNB
 669                   IF ( INITQ ) THEN
 670                      TOPQ = MAX( 2, J - JCOL + 1 )
 671                      NH  = IHI - TOPQ + 1
 672                   END IF
 673                   IF ( BLK22 ) THEN
 674 *
 675 *                    Exploit the structure of U.
 676 *
 677                      CALL SORM22( 'Right', 'No Transpose', NH, 2*NNB,
 678      $                            NNB, NNB, WORK( PPWO ), 2*NNB,
 679      $                            Q( TOPQ, J ), LDQ, WORK( PW ),
 680      $                            LWORK-PW+1, IERR )
 681                   ELSE
 682 *
 683 *                    Ignore the structure of U.
 684 *
 685                      CALL SGEMM( 'No Transpose', 'No Transpose', NH,
 686      $                           2*NNB, 2*NNB, ONE, Q( TOPQ, J ), LDQ,
 687      $                           WORK( PPWO ), 2*NNB, ZERO, WORK( PW ),
 688      $                           NH )
 689                      CALL SLACPY( 'All', NH, 2*NNB, WORK( PW ), NH,
 690      $                            Q( TOPQ, J ), LDQ )
 691                   END IF
 692                   PPWO = PPWO + 4*NNB*NNB
 693                END DO
 694             END IF
 695 *
 696 *           Accumulate right Givens rotations if required.
 697 *
 698             IF ( WANTZ .OR. TOP.GT.0 ) THEN
 699 *
 700 *              Initialize small orthogonal factors that will hold the
 701 *              accumulated Givens rotations in workspace.
 702 *
 703                CALL SLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK,
 704      $                      NBLST )
 705                PW = NBLST * NBLST + 1
 706                DO I = 1, N2NB
 707                   CALL SLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE,
 708      $                         WORK( PW ), 2*NNB )
 709                   PW = PW + 4*NNB*NNB
 710                END DO
 711 *
 712 *              Accumulate Givens rotations into workspace array.
 713 *
 714                DO J = JCOL, JCOL+NNB-1
 715                   PPW  = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1
 716                   LEN  = 2 + J - JCOL
 717                   JROW = J + N2NB*NNB + 2
 718                   DO I = IHI, JROW, -1
 719                      C = A( I, J )
 720                      A( I, J ) = ZERO
 721                      S = B( I, J )
 722                      B( I, J ) = ZERO
 723                      DO JJ = PPW, PPW+LEN-1
 724                         TEMP = WORK( JJ + NBLST )
 725                         WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ )
 726                         WORK( JJ ) = S*TEMP + C*WORK( JJ )
 727                      END DO
 728                      LEN = LEN + 1
 729                      PPW = PPW - NBLST - 1
 730                   END DO
 731 *
 732                   PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB
 733                   J0 = JROW - NNB
 734                   DO JROW = J0, J+2, -NNB
 735                      PPW = PPWO
 736                      LEN  = 2 + J - JCOL
 737                      DO I = JROW+NNB-1, JROW, -1
 738                         C = A( I, J )
 739                         A( I, J ) = ZERO
 740                         S = B( I, J )
 741                         B( I, J ) = ZERO
 742                         DO JJ = PPW, PPW+LEN-1
 743                            TEMP = WORK( JJ + 2*NNB )
 744                            WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ )
 745                            WORK( JJ ) = S*TEMP + C*WORK( JJ )
 746                         END DO
 747                         LEN = LEN + 1
 748                         PPW = PPW - 2*NNB - 1
 749                      END DO
 750                      PPWO = PPWO + 4*NNB*NNB
 751                   END DO
 752                END DO
 753             ELSE
 754 *
 755                CALL SLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO,
 756      $                      A( JCOL + 2, JCOL ), LDA )
 757                CALL SLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO,
 758      $                      B( JCOL + 2, JCOL ), LDB )
 759             END IF
 760 *
 761 *           Apply accumulated orthogonal matrices to A and B.
 762 *
 763             IF ( TOP.GT.0 ) THEN
 764                J = IHI - NBLST + 1
 765                CALL SGEMM( 'No Transpose', 'No Transpose', TOP,
 766      $                     NBLST, NBLST, ONE, A( 1, J ), LDA,
 767      $                     WORK, NBLST, ZERO, WORK( PW ), TOP )
 768                CALL SLACPY( 'All', TOP, NBLST, WORK( PW ), TOP,
 769      $                      A( 1, J ), LDA )
 770                PPWO = NBLST*NBLST + 1
 771                J0 = J - NNB
 772                DO J = J0, JCOL+1, -NNB
 773                   IF ( BLK22 ) THEN
 774 *
 775 *                    Exploit the structure of U.
 776 *
 777                      CALL SORM22( 'Right', 'No Transpose', TOP, 2*NNB,
 778      $                            NNB, NNB, WORK( PPWO ), 2*NNB,
 779      $                            A( 1, J ), LDA, WORK( PW ),
 780      $                            LWORK-PW+1, IERR )
 781                   ELSE
 782 *
 783 *                    Ignore the structure of U.
 784 *
 785                      CALL SGEMM( 'No Transpose', 'No Transpose', TOP,
 786      $                           2*NNB, 2*NNB, ONE, A( 1, J ), LDA,
 787      $                           WORK( PPWO ), 2*NNB, ZERO,
 788      $                           WORK( PW ), TOP )
 789                      CALL SLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP,
 790      $                            A( 1, J ), LDA )
 791                   END IF
 792                   PPWO = PPWO + 4*NNB*NNB
 793                END DO
 794 *
 795                J = IHI - NBLST + 1
 796                CALL SGEMM( 'No Transpose', 'No Transpose', TOP,
 797      $                     NBLST, NBLST, ONE, B( 1, J ), LDB,
 798      $                     WORK, NBLST, ZERO, WORK( PW ), TOP )
 799                CALL SLACPY( 'All', TOP, NBLST, WORK( PW ), TOP,
 800      $                      B( 1, J ), LDB )
 801                PPWO = NBLST*NBLST + 1
 802                J0 = J - NNB
 803                DO J = J0, JCOL+1, -NNB
 804                   IF ( BLK22 ) THEN
 805 *
 806 *                    Exploit the structure of U.
 807 *
 808                      CALL SORM22( 'Right', 'No Transpose', TOP, 2*NNB,
 809      $                            NNB, NNB, WORK( PPWO ), 2*NNB,
 810      $                            B( 1, J ), LDB, WORK( PW ),
 811      $                            LWORK-PW+1, IERR )
 812                   ELSE
 813 *
 814 *                    Ignore the structure of U.
 815 *
 816                      CALL SGEMM( 'No Transpose', 'No Transpose', TOP,
 817      $                           2*NNB, 2*NNB, ONE, B( 1, J ), LDB,
 818      $                           WORK( PPWO ), 2*NNB, ZERO,
 819      $                           WORK( PW ), TOP )
 820                      CALL SLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP,
 821      $                            B( 1, J ), LDB )
 822                   END IF
 823                   PPWO = PPWO + 4*NNB*NNB
 824                END DO
 825             END IF
 826 *
 827 *           Apply accumulated orthogonal matrices to Z.
 828 *
 829             IF( WANTZ ) THEN
 830                J = IHI - NBLST + 1
 831                IF ( INITQ ) THEN
 832                   TOPQ = MAX( 2, J - JCOL + 1 )
 833                   NH  = IHI - TOPQ + 1
 834                ELSE
 835                   TOPQ = 1
 836                   NH = N
 837                END IF
 838                CALL SGEMM( 'No Transpose', 'No Transpose', NH,
 839      $                     NBLST, NBLST, ONE, Z( TOPQ, J ), LDZ,
 840      $                     WORK, NBLST, ZERO, WORK( PW ), NH )
 841                CALL SLACPY( 'All', NH, NBLST, WORK( PW ), NH,
 842      $                      Z( TOPQ, J ), LDZ )
 843                PPWO = NBLST*NBLST + 1
 844                J0 = J - NNB
 845                DO J = J0, JCOL+1, -NNB
 846                      IF ( INITQ ) THEN
 847                      TOPQ = MAX( 2, J - JCOL + 1 )
 848                      NH  = IHI - TOPQ + 1
 849                   END IF
 850                   IF ( BLK22 ) THEN
 851 *
 852 *                    Exploit the structure of U.
 853 *
 854                      CALL SORM22( 'Right', 'No Transpose', NH, 2*NNB,
 855      $                            NNB, NNB, WORK( PPWO ), 2*NNB,
 856      $                            Z( TOPQ, J ), LDZ, WORK( PW ),
 857      $                            LWORK-PW+1, IERR )
 858                   ELSE
 859 *
 860 *                    Ignore the structure of U.
 861 *
 862                      CALL SGEMM( 'No Transpose', 'No Transpose', NH,
 863      $                           2*NNB, 2*NNB, ONE, Z( TOPQ, J ), LDZ,
 864      $                           WORK( PPWO ), 2*NNB, ZERO, WORK( PW ),
 865      $                           NH )
 866                      CALL SLACPY( 'All', NH, 2*NNB, WORK( PW ), NH,
 867      $                            Z( TOPQ, J ), LDZ )
 868                   END IF
 869                   PPWO = PPWO + 4*NNB*NNB
 870                END DO
 871             END IF
 872          END DO
 873       END IF
 874 *
 875 *     Use unblocked code to reduce the rest of the matrix
 876 *     Avoid re-initialization of modified Q and Z.
 877 *
 878       COMPQ2 = COMPQ
 879       COMPZ2 = COMPZ
 880       IF ( JCOL.NE.ILO ) THEN
 881          IF ( WANTQ )
 882      $      COMPQ2 = 'V'
 883          IF ( WANTZ )
 884      $      COMPZ2 = 'V'
 885       END IF
 886 *
 887       IF ( JCOL.LT.IHI )
 888      $   CALL SGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q,
 889      $                LDQ, Z, LDZ, IERR )
 890       WORK( 1 ) = REAL( LWKOPT )
 891 *
 892       RETURN
 893 *
 894 *     End of SGGHD3
 895 *
 896       END