1 *> \brief \b SLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general banded matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download SLA_GBRFSX_EXTENDED + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbrfsx_extended.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbrfsx_extended.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbrfsx_extended.f">
21 * SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU,
22 * NRHS, AB, LDAB, AFB, LDAFB, IPIV,
23 * COLEQU, C, B, LDB, Y, LDY,
24 * BERR_OUT, N_NORMS, ERR_BNDS_NORM,
25 * ERR_BNDS_COMP, RES, AYB, DY,
26 * Y_TAIL, RCOND, ITHRESH, RTHRESH,
27 * DZ_UB, IGNORE_CWISE, INFO )
29 * .. Scalar Arguments ..
30 * INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS,
31 * $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH
32 * LOGICAL COLEQU, IGNORE_CWISE
35 * .. Array Arguments ..
37 * REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
38 * $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*)
39 * REAL C( * ), AYB(*), RCOND, BERR_OUT(*),
40 * $ ERR_BNDS_NORM( NRHS, * ),
41 * $ ERR_BNDS_COMP( NRHS, * )
50 *> SLA_GBRFSX_EXTENDED improves the computed solution to a system of
51 *> linear equations by performing extra-precise iterative refinement
52 *> and provides error bounds and backward error estimates for the solution.
53 *> This subroutine is called by SGBRFSX to perform iterative refinement.
54 *> In addition to normwise error bound, the code provides maximum
55 *> componentwise error bound if possible. See comments for ERR_BNDS_NORM
56 *> and ERR_BNDS_COMP for details of the error bounds. Note that this
57 *> subroutine is only resonsible for setting the second fields of
58 *> ERR_BNDS_NORM and ERR_BNDS_COMP.
64 *> \param[in] PREC_TYPE
66 *> PREC_TYPE is INTEGER
67 *> Specifies the intermediate precision to be used in refinement.
68 *> The value is defined by ILAPREC(P) where P is a CHARACTER and
75 *> \param[in] TRANS_TYPE
77 *> TRANS_TYPE is INTEGER
78 *> Specifies the transposition operation on A.
79 *> The value is defined by ILATRANS(T) where T is a CHARACTER and
80 *> T = 'N': No transpose
82 *> = 'C': Conjugate transpose
88 *> The number of linear equations, i.e., the order of the
95 *> The number of subdiagonals within the band of A. KL >= 0.
101 *> The number of superdiagonals within the band of A. KU >= 0
107 *> The number of right-hand-sides, i.e., the number of columns of the
113 *> AB is REAL array, dimension (LDAB,N)
114 *> On entry, the N-by-N matrix AB.
120 *> The leading dimension of the array AB. LDAB >= max(1,N).
125 *> AFB is REAL array, dimension (LDAFB,N)
126 *> The factors L and U from the factorization
127 *> A = P*L*U as computed by SGBTRF.
133 *> The leading dimension of the array AF. LDAFB >= max(1,N).
138 *> IPIV is INTEGER array, dimension (N)
139 *> The pivot indices from the factorization A = P*L*U
140 *> as computed by SGBTRF; row i of the matrix was interchanged
147 *> If .TRUE. then column equilibration was done to A before calling
148 *> this routine. This is needed to compute the solution and error
154 *> C is REAL array, dimension (N)
155 *> The column scale factors for A. If COLEQU = .FALSE., C
156 *> is not accessed. If C is input, each element of C should be a power
157 *> of the radix to ensure a reliable solution and error estimates.
158 *> Scaling by powers of the radix does not cause rounding errors unless
159 *> the result underflows or overflows. Rounding errors during scaling
160 *> lead to refining with a matrix that is not equivalent to the
161 *> input matrix, producing error estimates that may not be
167 *> B is REAL array, dimension (LDB,NRHS)
168 *> The right-hand-side matrix B.
174 *> The leading dimension of the array B. LDB >= max(1,N).
179 *> Y is REAL array, dimension (LDY,NRHS)
180 *> On entry, the solution matrix X, as computed by SGBTRS.
181 *> On exit, the improved solution matrix Y.
187 *> The leading dimension of the array Y. LDY >= max(1,N).
190 *> \param[out] BERR_OUT
192 *> BERR_OUT is REAL array, dimension (NRHS)
193 *> On exit, BERR_OUT(j) contains the componentwise relative backward
194 *> error for right-hand-side j from the formula
195 *> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) )
196 *> where abs(Z) is the componentwise absolute value of the matrix
197 *> or vector Z. This is computed by SLA_LIN_BERR.
200 *> \param[in] N_NORMS
202 *> N_NORMS is INTEGER
203 *> Determines which error bounds to return (see ERR_BNDS_NORM
204 *> and ERR_BNDS_COMP).
205 *> If N_NORMS >= 1 return normwise error bounds.
206 *> If N_NORMS >= 2 return componentwise error bounds.
209 *> \param[in,out] ERR_BNDS_NORM
211 *> ERR_BNDS_NORM is REAL array, dimension
212 *> (NRHS, N_ERR_BNDS)
213 *> For each right-hand side, this array contains information about
214 *> various error bounds and condition numbers corresponding to the
215 *> normwise relative error, which is defined as follows:
217 *> Normwise relative error in the ith solution vector:
218 *> max_j (abs(XTRUE(j,i) - X(j,i)))
219 *> ------------------------------
222 *> The array is indexed by the type of error information as described
223 *> below. There currently are up to three pieces of information
226 *> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith
229 *> The second index in ERR_BNDS_NORM(:,err) contains the following
231 *> err = 1 "Trust/don't trust" boolean. Trust the answer if the
232 *> reciprocal condition number is less than the threshold
233 *> sqrt(n) * slamch('Epsilon').
235 *> err = 2 "Guaranteed" error bound: The estimated forward error,
236 *> almost certainly within a factor of 10 of the true error
237 *> so long as the next entry is greater than the threshold
238 *> sqrt(n) * slamch('Epsilon'). This error bound should only
239 *> be trusted if the previous boolean is true.
241 *> err = 3 Reciprocal condition number: Estimated normwise
242 *> reciprocal condition number. Compared with the threshold
243 *> sqrt(n) * slamch('Epsilon') to determine if the error
244 *> estimate is "guaranteed". These reciprocal condition
245 *> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some
246 *> appropriately scaled matrix Z.
247 *> Let Z = S*A, where S scales each row by a power of the
248 *> radix so all absolute row sums of Z are approximately 1.
250 *> This subroutine is only responsible for setting the second field
252 *> See Lapack Working Note 165 for further details and extra
256 *> \param[in,out] ERR_BNDS_COMP
258 *> ERR_BNDS_COMP is REAL array, dimension
259 *> (NRHS, N_ERR_BNDS)
260 *> For each right-hand side, this array contains information about
261 *> various error bounds and condition numbers corresponding to the
262 *> componentwise relative error, which is defined as follows:
264 *> Componentwise relative error in the ith solution vector:
265 *> abs(XTRUE(j,i) - X(j,i))
266 *> max_j ----------------------
269 *> The array is indexed by the right-hand side i (on which the
270 *> componentwise relative error depends), and the type of error
271 *> information as described below. There currently are up to three
272 *> pieces of information returned for each right-hand side. If
273 *> componentwise accuracy is not requested (PARAMS(3) = 0.0), then
274 *> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most
275 *> the first (:,N_ERR_BNDS) entries are returned.
277 *> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
280 *> The second index in ERR_BNDS_COMP(:,err) contains the following
282 *> err = 1 "Trust/don't trust" boolean. Trust the answer if the
283 *> reciprocal condition number is less than the threshold
284 *> sqrt(n) * slamch('Epsilon').
286 *> err = 2 "Guaranteed" error bound: The estimated forward error,
287 *> almost certainly within a factor of 10 of the true error
288 *> so long as the next entry is greater than the threshold
289 *> sqrt(n) * slamch('Epsilon'). This error bound should only
290 *> be trusted if the previous boolean is true.
292 *> err = 3 Reciprocal condition number: Estimated componentwise
293 *> reciprocal condition number. Compared with the threshold
294 *> sqrt(n) * slamch('Epsilon') to determine if the error
295 *> estimate is "guaranteed". These reciprocal condition
296 *> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some
297 *> appropriately scaled matrix Z.
298 *> Let Z = S*(A*diag(x)), where x is the solution for the
299 *> current right-hand side and S scales each row of
300 *> A*diag(x) by a power of the radix so all absolute row
301 *> sums of Z are approximately 1.
303 *> This subroutine is only responsible for setting the second field
305 *> See Lapack Working Note 165 for further details and extra
311 *> RES is REAL array, dimension (N)
312 *> Workspace to hold the intermediate residual.
317 *> AYB is REAL array, dimension (N)
318 *> Workspace. This can be the same workspace passed for Y_TAIL.
323 *> DY is REAL array, dimension (N)
324 *> Workspace to hold the intermediate solution.
329 *> Y_TAIL is REAL array, dimension (N)
330 *> Workspace to hold the trailing bits of the intermediate solution.
336 *> Reciprocal scaled condition number. This is an estimate of the
337 *> reciprocal Skeel condition number of the matrix A after
338 *> equilibration (if done). If this is less than the machine
339 *> precision (in particular, if it is zero), the matrix is singular
340 *> to working precision. Note that the error may still be small even
341 *> if this number is very small and the matrix appears ill-
345 *> \param[in] ITHRESH
347 *> ITHRESH is INTEGER
348 *> The maximum number of residual computations allowed for
349 *> refinement. The default is 10. For 'aggressive' set to 100 to
350 *> permit convergence using approximate factorizations or
351 *> factorizations other than LU. If the factorization uses a
352 *> technique other than Gaussian elimination, the guarantees in
353 *> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy.
356 *> \param[in] RTHRESH
359 *> Determines when to stop refinement if the error estimate stops
360 *> decreasing. Refinement will stop when the next solution no longer
361 *> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is
362 *> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The
363 *> default value is 0.5. For 'aggressive' set to 0.9 to permit
364 *> convergence on extremely ill-conditioned matrices. See LAWN 165
371 *> Determines when to start considering componentwise convergence.
372 *> Componentwise convergence is only considered after each component
373 *> of the solution Y is stable, which we definte as the relative
374 *> change in each component being less than DZ_UB. The default value
375 *> is 0.25, requiring the first bit to be stable. See LAWN 165 for
379 *> \param[in] IGNORE_CWISE
381 *> IGNORE_CWISE is LOGICAL
382 *> If .TRUE. then ignore componentwise convergence. Default value
389 *> = 0: Successful exit.
390 *> < 0: if INFO = -i, the ith argument to SGBTRS had an illegal
397 *> \author Univ. of Tennessee
398 *> \author Univ. of California Berkeley
399 *> \author Univ. of Colorado Denver
402 *> \date September 2012
404 *> \ingroup realGBcomputational
406 * =====================================================================
407 SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU,
408 $ NRHS, AB, LDAB, AFB, LDAFB, IPIV,
409 $ COLEQU, C, B, LDB, Y, LDY,
410 $ BERR_OUT, N_NORMS, ERR_BNDS_NORM,
411 $ ERR_BNDS_COMP, RES, AYB, DY,
412 $ Y_TAIL, RCOND, ITHRESH, RTHRESH,
413 $ DZ_UB, IGNORE_CWISE, INFO )
415 * -- LAPACK computational routine (version 3.4.2) --
416 * -- LAPACK is a software package provided by Univ. of Tennessee, --
417 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
420 * .. Scalar Arguments ..
421 INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS,
422 $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH
423 LOGICAL COLEQU, IGNORE_CWISE
426 * .. Array Arguments ..
428 REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
429 $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*)
430 REAL C( * ), AYB(*), RCOND, BERR_OUT(*),
431 $ ERR_BNDS_NORM( NRHS, * ),
432 $ ERR_BNDS_COMP( NRHS, * )
435 * =====================================================================
437 * .. Local Scalars ..
439 INTEGER CNT, I, J, M, X_STATE, Z_STATE, Y_PREC_STATE
440 REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT,
441 $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX,
442 $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z,
443 $ EPS, HUGEVAL, INCR_THRESH
447 INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE,
448 $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL,
450 PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1,
451 $ CONV_STATE = 2, NOPROG_STATE = 3 )
452 PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1,
454 INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I
455 INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I
456 INTEGER CMP_ERR_I, PIV_GROWTH_I
457 PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2,
459 PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 )
460 PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8,
462 INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I,
464 PARAMETER ( LA_LINRX_ITREF_I = 1,
465 $ LA_LINRX_ITHRESH_I = 2 )
466 PARAMETER ( LA_LINRX_CWISE_I = 3 )
467 INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I,
469 PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 )
470 PARAMETER ( LA_LINRX_RCOND_I = 3 )
472 * .. External Subroutines ..
473 EXTERNAL SAXPY, SCOPY, SGBTRS, SGBMV, BLAS_SGBMV_X,
474 $ BLAS_SGBMV2_X, SLA_GBAMV, SLA_WWADDW, SLAMCH,
475 $ CHLA_TRANSTYPE, SLA_LIN_BERR
477 CHARACTER CHLA_TRANSTYPE
479 * .. Intrinsic Functions ..
480 INTRINSIC ABS, MAX, MIN
482 * .. Executable Statements ..
484 IF (INFO.NE.0) RETURN
485 TRANS = CHLA_TRANSTYPE(TRANS_TYPE)
486 EPS = SLAMCH( 'Epsilon' )
487 HUGEVAL = SLAMCH( 'Overflow' )
488 * Force HUGEVAL to Inf
489 HUGEVAL = HUGEVAL * HUGEVAL
490 * Using HUGEVAL may lead to spurious underflows.
491 INCR_THRESH = REAL( N ) * EPS
495 Y_PREC_STATE = EXTRA_RESIDUAL
496 IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN
513 X_STATE = WORKING_STATE
514 Z_STATE = UNSTABLE_STATE
519 * Compute residual RES = B_s - op(A_s) * Y,
520 * op(A) = A, A**T, or A**H depending on TRANS (and type).
522 CALL SCOPY( N, B( 1, J ), 1, RES, 1 )
523 IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN
524 CALL SGBMV( TRANS, M, N, KL, KU, -1.0, AB, LDAB,
525 $ Y( 1, J ), 1, 1.0, RES, 1 )
526 ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN
527 CALL BLAS_SGBMV_X( TRANS_TYPE, N, N, KL, KU,
528 $ -1.0, AB, LDAB, Y( 1, J ), 1, 1.0, RES, 1,
531 CALL BLAS_SGBMV2_X( TRANS_TYPE, N, N, KL, KU, -1.0,
532 $ AB, LDAB, Y( 1, J ), Y_TAIL, 1, 1.0, RES, 1,
536 ! XXX: RES is no longer needed.
537 CALL SCOPY( N, RES, 1, DY, 1 )
538 CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, DY, N,
541 * Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT.
550 YK = ABS( Y( I, J ) )
553 IF ( YK .NE. 0.0 ) THEN
554 DZ_Z = MAX( DZ_Z, DYK / YK )
555 ELSE IF ( DYK .NE. 0.0 ) THEN
559 YMIN = MIN( YMIN, YK )
561 NORMY = MAX( NORMY, YK )
564 NORMX = MAX( NORMX, YK * C( I ) )
565 NORMDX = MAX( NORMDX, DYK * C( I ) )
568 NORMDX = MAX( NORMDX, DYK )
572 IF ( NORMX .NE. 0.0 ) THEN
573 DX_X = NORMDX / NORMX
574 ELSE IF ( NORMDX .EQ. 0.0 ) THEN
580 DXRAT = NORMDX / PREVNORMDX
581 DZRAT = DZ_Z / PREV_DZ_Z
583 * Check termination criteria.
585 IF ( .NOT.IGNORE_CWISE
586 $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY
587 $ .AND. Y_PREC_STATE .LT. EXTRA_Y )
590 IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH )
591 $ X_STATE = WORKING_STATE
592 IF ( X_STATE .EQ. WORKING_STATE ) THEN
593 IF ( DX_X .LE. EPS ) THEN
595 ELSE IF ( DXRAT .GT. RTHRESH ) THEN
596 IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN
599 X_STATE = NOPROG_STATE
602 IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT
604 IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X
607 IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB )
608 $ Z_STATE = WORKING_STATE
609 IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH )
610 $ Z_STATE = WORKING_STATE
611 IF ( Z_STATE .EQ. WORKING_STATE ) THEN
612 IF ( DZ_Z .LE. EPS ) THEN
614 ELSE IF ( DZ_Z .GT. DZ_UB ) THEN
615 Z_STATE = UNSTABLE_STATE
618 ELSE IF ( DZRAT .GT. RTHRESH ) THEN
619 IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN
622 Z_STATE = NOPROG_STATE
625 IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT
627 IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z
630 * Exit if both normwise and componentwise stopped working,
631 * but if componentwise is unstable, let it go at least two
634 IF ( X_STATE.NE.WORKING_STATE ) THEN
635 IF ( IGNORE_CWISE ) GOTO 666
636 IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE )
638 IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666
641 IF ( INCR_PREC ) THEN
643 Y_PREC_STATE = Y_PREC_STATE + 1
654 IF (Y_PREC_STATE .LT. EXTRA_Y) THEN
655 CALL SAXPY( N, 1.0, DY, 1, Y(1,J), 1 )
657 CALL SLA_WWADDW( N, Y(1,J), Y_TAIL, DY )
661 * Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT.
664 * Set final_* when cnt hits ithresh.
666 IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X
667 IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z
669 * Compute error bounds.
671 IF ( N_NORMS .GE. 1 ) THEN
672 ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) =
673 $ FINAL_DX_X / (1 - DXRATMAX)
675 IF (N_NORMS .GE. 2) THEN
676 ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) =
677 $ FINAL_DZ_Z / (1 - DZRATMAX)
680 * Compute componentwise relative backward error from formula
681 * max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) )
682 * where abs(Z) is the componentwise absolute value of the matrix
685 * Compute residual RES = B_s - op(A_s) * Y,
686 * op(A) = A, A**T, or A**H depending on TRANS (and type).
688 CALL SCOPY( N, B( 1, J ), 1, RES, 1 )
689 CALL SGBMV(TRANS, N, N, KL, KU, -1.0, AB, LDAB, Y(1,J),
693 AYB( I ) = ABS( B( I, J ) )
696 * Compute abs(op(A_s))*abs(Y) + abs(B_s).
698 CALL SLA_GBAMV( TRANS_TYPE, N, N, KL, KU, 1.0,
699 $ AB, LDAB, Y(1, J), 1, 1.0, AYB, 1 )
701 CALL SLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) )
703 * End of loop for each RHS