1 *> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download SLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f">
21 * SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23 * LDU, NV, WV, LDWV, NH, WH, LDWH )
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
30 * .. Array Arguments ..
31 * REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
42 *> SLAQR5, called by SLAQR0, performs a
43 *> single small-bulge multi-shift QR sweep.
51 *> WANTT is logical scalar
52 *> WANTT = .true. if the quasi-triangular Schur factor
53 *> is being computed. WANTT is set to .false. otherwise.
58 *> WANTZ is logical scalar
59 *> WANTZ = .true. if the orthogonal Schur factor is being
60 *> computed. WANTZ is set to .false. otherwise.
65 *> KACC22 is integer with value 0, 1, or 2.
66 *> Specifies the computation mode of far-from-diagonal
67 *> orthogonal updates.
68 *> = 0: SLAQR5 does not accumulate reflections and does not
69 *> use matrix-matrix multiply to update far-from-diagonal
71 *> = 1: SLAQR5 accumulates reflections and uses matrix-matrix
72 *> multiply to update the far-from-diagonal matrix entries.
73 *> = 2: SLAQR5 accumulates reflections, uses matrix-matrix
74 *> multiply to update the far-from-diagonal matrix entries,
75 *> and takes advantage of 2-by-2 block structure during
81 *> N is integer scalar
82 *> N is the order of the Hessenberg matrix H upon which this
83 *> subroutine operates.
88 *> KTOP is integer scalar
93 *> KBOT is integer scalar
94 *> These are the first and last rows and columns of an
95 *> isolated diagonal block upon which the QR sweep is to be
96 *> applied. It is assumed without a check that
97 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
99 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
104 *> NSHFTS is integer scalar
105 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
106 *> must be positive and even.
111 *> SR is REAL array of size (NSHFTS)
116 *> SI is REAL array of size (NSHFTS)
117 *> SR contains the real parts and SI contains the imaginary
118 *> parts of the NSHFTS shifts of origin that define the
119 *> multi-shift QR sweep. On output SR and SI may be
125 *> H is REAL array of size (LDH,N)
126 *> On input H contains a Hessenberg matrix. On output a
127 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128 *> to the isolated diagonal block in rows and columns KTOP
134 *> LDH is integer scalar
135 *> LDH is the leading dimension of H just as declared in the
136 *> calling procedure. LDH.GE.MAX(1,N).
147 *> Specify the rows of Z to which transformations must be
148 *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
153 *> Z is REAL array of size (LDZ,IHIZ)
154 *> If WANTZ = .TRUE., then the QR Sweep orthogonal
155 *> similarity transformation is accumulated into
156 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
157 *> If WANTZ = .FALSE., then Z is unreferenced.
162 *> LDZ is integer scalar
163 *> LDA is the leading dimension of Z just as declared in
164 *> the calling procedure. LDZ.GE.N.
169 *> V is REAL array of size (LDV,NSHFTS/2)
174 *> LDV is integer scalar
175 *> LDV is the leading dimension of V as declared in the
176 *> calling procedure. LDV.GE.3.
181 *> U is REAL array of size
187 *> LDU is integer scalar
188 *> LDU is the leading dimension of U just as declared in the
189 *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
194 *> NH is integer scalar
195 *> NH is the number of columns in array WH available for
196 *> workspace. NH.GE.1.
201 *> WH is REAL array of size (LDWH,NH)
206 *> LDWH is integer scalar
207 *> Leading dimension of WH just as declared in the
208 *> calling procedure. LDWH.GE.3*NSHFTS-3.
213 *> NV is integer scalar
214 *> NV is the number of rows in WV agailable for workspace.
220 *> WV is REAL array of size
226 *> LDWV is integer scalar
227 *> LDWV is the leading dimension of WV as declared in the
228 *> in the calling subroutine. LDWV.GE.NV.
234 *> \author Univ. of Tennessee
235 *> \author Univ. of California Berkeley
236 *> \author Univ. of Colorado Denver
241 *> \ingroup realOTHERauxiliary
243 *> \par Contributors:
246 *> Karen Braman and Ralph Byers, Department of Mathematics,
247 *> University of Kansas, USA
252 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
257 * =====================================================================
258 SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
259 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
260 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
262 * -- LAPACK auxiliary routine (version 3.6.1) --
263 * -- LAPACK is a software package provided by Univ. of Tennessee, --
264 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
267 * .. Scalar Arguments ..
268 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
269 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
272 * .. Array Arguments ..
273 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
274 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
278 * ================================================================
281 PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 )
283 * .. Local Scalars ..
284 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
285 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
287 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
288 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
289 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
291 LOGICAL ACCUM, BLK22, BMP22
293 * .. External Functions ..
297 * .. Intrinsic Functions ..
299 INTRINSIC ABS, MAX, MIN, MOD, REAL
304 * .. External Subroutines ..
305 EXTERNAL SGEMM, SLABAD, SLACPY, SLAQR1, SLARFG, SLASET,
308 * .. Executable Statements ..
310 * ==== If there are no shifts, then there is nothing to do. ====
315 * ==== If the active block is empty or 1-by-1, then there
316 * . is nothing to do. ====
321 * ==== Shuffle shifts into pairs of real shifts and pairs
322 * . of complex conjugate shifts assuming complex
323 * . conjugate shifts are already adjacent to one
326 DO 10 I = 1, NSHFTS - 2, 2
327 IF( SI( I ).NE.-SI( I+1 ) ) THEN
331 SR( I+1 ) = SR( I+2 )
336 SI( I+1 ) = SI( I+2 )
341 * ==== NSHFTS is supposed to be even, but if it is odd,
342 * . then simply reduce it by one. The shuffle above
343 * . ensures that the dropped shift is real and that
344 * . the remaining shifts are paired. ====
346 NS = NSHFTS - MOD( NSHFTS, 2 )
348 * ==== Machine constants for deflation ====
350 SAFMIN = SLAMCH( 'SAFE MINIMUM' )
351 SAFMAX = ONE / SAFMIN
352 CALL SLABAD( SAFMIN, SAFMAX )
353 ULP = SLAMCH( 'PRECISION' )
354 SMLNUM = SAFMIN*( REAL( N ) / ULP )
356 * ==== Use accumulated reflections to update far-from-diagonal
359 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
361 * ==== If so, exploit the 2-by-2 block structure? ====
363 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
365 * ==== clear trash ====
368 $ H( KTOP+2, KTOP ) = ZERO
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
374 * ==== KDU = width of slab ====
378 * ==== Create and chase chains of NBMPS bulges ====
380 DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
383 $ CALL SLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
385 * ==== Near-the-diagonal bulge chase. The following loop
386 * . performs the near-the-diagonal part of a small bulge
387 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
388 * . chunk extends from column INCOL to column NDCOL
389 * . (including both column INCOL and column NDCOL). The
390 * . following loop chases a 3*NBMPS column long chain of
391 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
392 * . may be less than KTOP and and NDCOL may be greater than
393 * . KBOT indicating phantom columns from which to chase
394 * . bulges before they are actually introduced or to which
395 * . to chase bulges beyond column KBOT.) ====
397 DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
399 * ==== Bulges number MTOP to MBOT are active double implicit
400 * . shift bulges. There may or may not also be small
401 * . 2-by-2 bulge, if there is room. The inactive bulges
402 * . (if any) must wait until the active bulges have moved
403 * . down the diagonal to make room. The phantom matrix
404 * . paradigm described above helps keep track. ====
406 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
407 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
409 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
412 * ==== Generate reflections to chase the chain right
413 * . one column. (The minimum value of K is KTOP-1.) ====
416 K = KRCOL + 3*( M-1 )
417 IF( K.EQ.KTOP-1 ) THEN
418 CALL SLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
419 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
422 CALL SLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
425 V( 2, M ) = H( K+2, K )
426 V( 3, M ) = H( K+3, K )
427 CALL SLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
429 * ==== A Bulge may collapse because of vigilant
430 * . deflation or destructive underflow. In the
431 * . underflow case, try the two-small-subdiagonals
432 * . trick to try to reinflate the bulge. ====
434 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
435 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
437 * ==== Typical case: not collapsed (yet). ====
444 * ==== Atypical case: collapsed. Attempt to
445 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
446 * . If the fill resulting from the new
447 * . reflector is too large, then abandon it.
448 * . Otherwise, use the new one. ====
450 CALL SLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
451 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
454 CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
455 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
458 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
459 $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
460 $ ( ABS( H( K, K ) )+ABS( H( K+1,
461 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
463 * ==== Starting a new bulge here would
464 * . create non-negligible fill. Use
465 * . the old one with trepidation. ====
472 * ==== Stating a new bulge here would
473 * . create only negligible fill.
474 * . Replace the old reflector with
475 * . the new one. ====
477 H( K+1, K ) = H( K+1, K ) - REFSUM
488 * ==== Generate a 2-by-2 reflection, if needed. ====
490 K = KRCOL + 3*( M22-1 )
492 IF( K.EQ.KTOP-1 ) THEN
493 CALL SLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
494 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
497 CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
500 V( 2, M22 ) = H( K+2, K )
501 CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
507 * ==== Multiply H by reflections from the left ====
510 JBOT = MIN( NDCOL, KBOT )
511 ELSE IF( WANTT ) THEN
516 DO 40 J = MAX( KTOP, KRCOL ), JBOT
517 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
519 K = KRCOL + 3*( M-1 )
520 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
521 $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
522 H( K+1, J ) = H( K+1, J ) - REFSUM
523 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
524 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
528 K = KRCOL + 3*( M22-1 )
529 DO 50 J = MAX( K+1, KTOP ), JBOT
530 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
532 H( K+1, J ) = H( K+1, J ) - REFSUM
533 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
537 * ==== Multiply H by reflections from the right.
538 * . Delay filling in the last row until the
539 * . vigilant deflation check is complete. ====
542 JTOP = MAX( KTOP, INCOL )
543 ELSE IF( WANTT ) THEN
549 IF( V( 1, M ).NE.ZERO ) THEN
550 K = KRCOL + 3*( M-1 )
551 DO 60 J = JTOP, MIN( KBOT, K+3 )
552 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
553 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
554 H( J, K+1 ) = H( J, K+1 ) - REFSUM
555 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
556 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
561 * ==== Accumulate U. (If necessary, update Z later
562 * . with with an efficient matrix-matrix
566 DO 70 J = MAX( 1, KTOP-INCOL ), KDU
567 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
568 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
569 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
570 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
571 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
573 ELSE IF( WANTZ ) THEN
575 * ==== U is not accumulated, so update Z
576 * . now by multiplying by reflections
577 * . from the right. ====
580 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
581 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
582 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
583 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
584 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
590 * ==== Special case: 2-by-2 reflection (if needed) ====
592 K = KRCOL + 3*( M22-1 )
594 IF ( V( 1, M22 ).NE.ZERO ) THEN
595 DO 100 J = JTOP, MIN( KBOT, K+3 )
596 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
598 H( J, K+1 ) = H( J, K+1 ) - REFSUM
599 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
604 DO 110 J = MAX( 1, KTOP-INCOL ), KDU
605 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
606 $ V( 2, M22 )*U( J, KMS+2 ) )
607 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
608 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*
611 ELSE IF( WANTZ ) THEN
612 DO 120 J = ILOZ, IHIZ
613 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
615 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
616 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
622 * ==== Vigilant deflation check ====
625 IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
626 $ MSTART = MSTART + 1
630 IF( KRCOL.EQ.KBOT-2 )
632 DO 130 M = MSTART, MEND
633 K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
635 * ==== The following convergence test requires that
636 * . the tradition small-compared-to-nearby-diagonals
637 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
638 * . criteria both be satisfied. The latter improves
639 * . accuracy in some examples. Falling back on an
640 * . alternate convergence criterion when TST1 or TST2
641 * . is zero (as done here) is traditional but probably
642 * . unnecessary. ====
644 IF( H( K+1, K ).NE.ZERO ) THEN
645 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
646 IF( TST1.EQ.ZERO ) THEN
648 $ TST1 = TST1 + ABS( H( K, K-1 ) )
650 $ TST1 = TST1 + ABS( H( K, K-2 ) )
652 $ TST1 = TST1 + ABS( H( K, K-3 ) )
654 $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
656 $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
658 $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
660 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
662 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
663 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
664 H11 = MAX( ABS( H( K+1, K+1 ) ),
665 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
666 H22 = MIN( ABS( H( K+1, K+1 ) ),
667 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
669 TST2 = H22*( H11 / SCL )
671 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
672 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
677 * ==== Fill in the last row of each bulge. ====
679 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
680 DO 140 M = MTOP, MEND
681 K = KRCOL + 3*( M-1 )
682 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
683 H( K+4, K+1 ) = -REFSUM
684 H( K+4, K+2 ) = -REFSUM*V( 2, M )
685 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
688 * ==== End of near-the-diagonal bulge chase. ====
692 * ==== Use U (if accumulated) to update far-from-diagonal
693 * . entries in H. If required, use U to update Z as
704 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
705 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
707 * ==== Updates not exploiting the 2-by-2 block
708 * . structure of U. K1 and NU keep track of
709 * . the location and size of U in the special
710 * . cases of introducing bulges and chasing
711 * . bulges off the bottom. In these special
712 * . cases and in case the number of shifts
713 * . is NS = 2, there is no 2-by-2 block
714 * . structure to exploit. ====
716 K1 = MAX( 1, KTOP-INCOL )
717 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
719 * ==== Horizontal Multiply ====
721 DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
722 JLEN = MIN( NH, JBOT-JCOL+1 )
723 CALL SGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
724 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
726 CALL SLACPY( 'ALL', NU, JLEN, WH, LDWH,
727 $ H( INCOL+K1, JCOL ), LDH )
730 * ==== Vertical multiply ====
732 DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
733 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
734 CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE,
735 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
736 $ LDU, ZERO, WV, LDWV )
737 CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV,
738 $ H( JROW, INCOL+K1 ), LDH )
741 * ==== Z multiply (also vertical) ====
744 DO 180 JROW = ILOZ, IHIZ, NV
745 JLEN = MIN( NV, IHIZ-JROW+1 )
746 CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE,
747 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
748 $ LDU, ZERO, WV, LDWV )
749 CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV,
750 $ Z( JROW, INCOL+K1 ), LDZ )
755 * ==== Updates exploiting U's 2-by-2 block structure.
756 * . (I2, I4, J2, J4 are the last rows and columns
757 * . of the blocks.) ====
764 * ==== KZS and KNZ deal with the band of zeros
765 * . along the diagonal of one of the triangular
768 KZS = ( J4-J2 ) - ( NS+1 )
771 * ==== Horizontal multiply ====
773 DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
774 JLEN = MIN( NH, JBOT-JCOL+1 )
776 * ==== Copy bottom of H to top+KZS of scratch ====
777 * (The first KZS rows get multiplied by zero.) ====
779 CALL SLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
780 $ LDH, WH( KZS+1, 1 ), LDWH )
782 * ==== Multiply by U21**T ====
784 CALL SLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
785 CALL STRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
786 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
789 * ==== Multiply top of H by U11**T ====
791 CALL SGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
792 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
794 * ==== Copy top of H to bottom of WH ====
796 CALL SLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
797 $ WH( I2+1, 1 ), LDWH )
799 * ==== Multiply by U21**T ====
801 CALL STRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
802 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
804 * ==== Multiply by U22 ====
806 CALL SGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
807 $ U( J2+1, I2+1 ), LDU,
808 $ H( INCOL+1+J2, JCOL ), LDH, ONE,
809 $ WH( I2+1, 1 ), LDWH )
811 * ==== Copy it back ====
813 CALL SLACPY( 'ALL', KDU, JLEN, WH, LDWH,
814 $ H( INCOL+1, JCOL ), LDH )
817 * ==== Vertical multiply ====
819 DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
820 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
822 * ==== Copy right of H to scratch (the first KZS
823 * . columns get multiplied by zero) ====
825 CALL SLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
826 $ LDH, WV( 1, 1+KZS ), LDWV )
828 * ==== Multiply by U21 ====
830 CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
831 CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
832 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
835 * ==== Multiply by U11 ====
837 CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE,
838 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
841 * ==== Copy left of H to right of scratch ====
843 CALL SLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
844 $ WV( 1, 1+I2 ), LDWV )
846 * ==== Multiply by U21 ====
848 CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
849 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
851 * ==== Multiply by U22 ====
853 CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
854 $ H( JROW, INCOL+1+J2 ), LDH,
855 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
858 * ==== Copy it back ====
860 CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV,
861 $ H( JROW, INCOL+1 ), LDH )
864 * ==== Multiply Z (also vertical) ====
867 DO 210 JROW = ILOZ, IHIZ, NV
868 JLEN = MIN( NV, IHIZ-JROW+1 )
870 * ==== Copy right of Z to left of scratch (first
871 * . KZS columns get multiplied by zero) ====
873 CALL SLACPY( 'ALL', JLEN, KNZ,
874 $ Z( JROW, INCOL+1+J2 ), LDZ,
875 $ WV( 1, 1+KZS ), LDWV )
877 * ==== Multiply by U12 ====
879 CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
881 CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
882 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
885 * ==== Multiply by U11 ====
887 CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE,
888 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
891 * ==== Copy left of Z to right of scratch ====
893 CALL SLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
894 $ LDZ, WV( 1, 1+I2 ), LDWV )
896 * ==== Multiply by U21 ====
898 CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
899 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
902 * ==== Multiply by U22 ====
904 CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
905 $ Z( JROW, INCOL+1+J2 ), LDZ,
906 $ U( J2+1, I2+1 ), LDU, ONE,
907 $ WV( 1, 1+I2 ), LDWV )
909 * ==== Copy the result back to Z ====
911 CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV,
912 $ Z( JROW, INCOL+1 ), LDZ )
919 * ==== End of SLAQR5 ====