SRC/dsgesv.f

   1 *> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)
   2 *
   3 *  =========== DOCUMENTATION ===========
   4 *
   5 * Online html documentation available at
   6 *            http://www.netlib.org/lapack/explore-html/
   7 *
   8 *> \htmlonly
   9 *> Download DSGESV + dependencies
  10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">
  11 *> [TGZ]</a>
  12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">
  13 *> [ZIP]</a>
  14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">
  15 *> [TXT]</a>
  16 *> \endhtmlonly
  17 *
  18 *  Definition:
  19 *  ===========
  20 *
  21 *       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
  22 *                          SWORK, ITER, INFO )
  23 *
  24 *       .. Scalar Arguments ..
  25 *       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
  26 *       ..
  27 *       .. Array Arguments ..
  28 *       INTEGER            IPIV( * )
  29 *       REAL               SWORK( * )
  30 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
  31 *      $                   X( LDX, * )
  32 *       ..
  33 *
  34 *
  35 *> \par Purpose:
  36 *  =============
  37 *>
  38 *> \verbatim
  39 *>
  40 *> DSGESV computes the solution to a real system of linear equations
  41 *>    A * X = B,
  42 *> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
  43 *>
  44 *> DSGESV first attempts to factorize the matrix in SINGLE PRECISION
  45 *> and use this factorization within an iterative refinement procedure
  46 *> to produce a solution with DOUBLE PRECISION normwise backward error
  47 *> quality (see below). If the approach fails the method switches to a
  48 *> DOUBLE PRECISION factorization and solve.
  49 *>
  50 *> The iterative refinement is not going to be a winning strategy if
  51 *> the ratio SINGLE PRECISION performance over DOUBLE PRECISION
  52 *> performance is too small. A reasonable strategy should take the
  53 *> number of right-hand sides and the size of the matrix into account.
  54 *> This might be done with a call to ILAENV in the future. Up to now, we
  55 *> always try iterative refinement.
  56 *>
  57 *> The iterative refinement process is stopped if
  58 *>     ITER > ITERMAX
  59 *> or for all the RHS we have:
  60 *>     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
  61 *> where
  62 *>     o ITER is the number of the current iteration in the iterative
  63 *>       refinement process
  64 *>     o RNRM is the infinity-norm of the residual
  65 *>     o XNRM is the infinity-norm of the solution
  66 *>     o ANRM is the infinity-operator-norm of the matrix A
  67 *>     o EPS is the machine epsilon returned by DLAMCH('Epsilon')
  68 *> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
  69 *> respectively.
  70 *> \endverbatim
  71 *
  72 *  Arguments:
  73 *  ==========
  74 *
  75 *> \param[in] N
  76 *> \verbatim
  77 *>          N is INTEGER
  78 *>          The number of linear equations, i.e., the order of the
  79 *>          matrix A.  N >= 0.
  80 *> \endverbatim
  81 *>
  82 *> \param[in] NRHS
  83 *> \verbatim
  84 *>          NRHS is INTEGER
  85 *>          The number of right hand sides, i.e., the number of columns
  86 *>          of the matrix B.  NRHS >= 0.
  87 *> \endverbatim
  88 *>
  89 *> \param[in,out] A
  90 *> \verbatim
  91 *>          A is DOUBLE PRECISION array,
  92 *>          dimension (LDA,N)
  93 *>          On entry, the N-by-N coefficient matrix A.
  94 *>          On exit, if iterative refinement has been successfully used
  95 *>          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
  96 *>          unchanged, if double precision factorization has been used
  97 *>          (INFO.EQ.0 and ITER.LT.0, see description below), then the
  98 *>          array A contains the factors L and U from the factorization
  99 *>          A = P*L*U; the unit diagonal elements of L are not stored.
 100 *> \endverbatim
 101 *>
 102 *> \param[in] LDA
 103 *> \verbatim
 104 *>          LDA is INTEGER
 105 *>          The leading dimension of the array A.  LDA >= max(1,N).
 106 *> \endverbatim
 107 *>
 108 *> \param[out] IPIV
 109 *> \verbatim
 110 *>          IPIV is INTEGER array, dimension (N)
 111 *>          The pivot indices that define the permutation matrix P;
 112 *>          row i of the matrix was interchanged with row IPIV(i).
 113 *>          Corresponds either to the single precision factorization
 114 *>          (if INFO.EQ.0 and ITER.GE.0) or the double precision
 115 *>          factorization (if INFO.EQ.0 and ITER.LT.0).
 116 *> \endverbatim
 117 *>
 118 *> \param[in] B
 119 *> \verbatim
 120 *>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
 121 *>          The N-by-NRHS right hand side matrix B.
 122 *> \endverbatim
 123 *>
 124 *> \param[in] LDB
 125 *> \verbatim
 126 *>          LDB is INTEGER
 127 *>          The leading dimension of the array B.  LDB >= max(1,N).
 128 *> \endverbatim
 129 *>
 130 *> \param[out] X
 131 *> \verbatim
 132 *>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
 133 *>          If INFO = 0, the N-by-NRHS solution matrix X.
 134 *> \endverbatim
 135 *>
 136 *> \param[in] LDX
 137 *> \verbatim
 138 *>          LDX is INTEGER
 139 *>          The leading dimension of the array X.  LDX >= max(1,N).
 140 *> \endverbatim
 141 *>
 142 *> \param[out] WORK
 143 *> \verbatim
 144 *>          WORK is DOUBLE PRECISION array, dimension (N,NRHS)
 145 *>          This array is used to hold the residual vectors.
 146 *> \endverbatim
 147 *>
 148 *> \param[out] SWORK
 149 *> \verbatim
 150 *>          SWORK is REAL array, dimension (N*(N+NRHS))
 151 *>          This array is used to use the single precision matrix and the
 152 *>          right-hand sides or solutions in single precision.
 153 *> \endverbatim
 154 *>
 155 *> \param[out] ITER
 156 *> \verbatim
 157 *>          ITER is INTEGER
 158 *>          < 0: iterative refinement has failed, double precision
 159 *>               factorization has been performed
 160 *>               -1 : the routine fell back to full precision for
 161 *>                    implementation- or machine-specific reasons
 162 *>               -2 : narrowing the precision induced an overflow,
 163 *>                    the routine fell back to full precision
 164 *>               -3 : failure of SGETRF
 165 *>               -31: stop the iterative refinement after the 30th
 166 *>                    iterations
 167 *>          > 0: iterative refinement has been successfully used.
 168 *>               Returns the number of iterations
 169 *> \endverbatim
 170 *>
 171 *> \param[out] INFO
 172 *> \verbatim
 173 *>          INFO is INTEGER
 174 *>          = 0:  successful exit
 175 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 176 *>          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
 177 *>                exactly zero.  The factorization has been completed,
 178 *>                but the factor U is exactly singular, so the solution
 179 *>                could not be computed.
 180 *> \endverbatim
 181 *
 182 *  Authors:
 183 *  ========
 184 *
 185 *> \author Univ. of Tennessee
 186 *> \author Univ. of California Berkeley
 187 *> \author Univ. of Colorado Denver
 188 *> \author NAG Ltd.
 189 *
 190 *> \date June 2016
 191 *
 192 *> \ingroup doubleGEsolve
 193 *
 194 *  =====================================================================
 195       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
 196      $                   SWORK, ITER, INFO )
 197 *
 198 *  -- LAPACK driver routine (version 3.6.1) --
 199 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 200 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 201 *     June 2016
 202 *
 203 *     .. Scalar Arguments ..
 204       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
 205 *     ..
 206 *     .. Array Arguments ..
 207       INTEGER            IPIV( * )
 208       REAL               SWORK( * )
 209       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
 210      $                   X( LDX, * )
 211 *     ..
 212 *
 213 *  =====================================================================
 214 *
 215 *     .. Parameters ..
 216       LOGICAL            DOITREF
 217       PARAMETER          ( DOITREF = .TRUE. )
 218 *
 219       INTEGER            ITERMAX
 220       PARAMETER          ( ITERMAX = 30 )
 221 *
 222       DOUBLE PRECISION   BWDMAX
 223       PARAMETER          ( BWDMAX = 1.0E+00 )
 224 *
 225       DOUBLE PRECISION   NEGONE, ONE
 226       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
 227 *
 228 *     .. Local Scalars ..
 229       INTEGER            I, IITER, PTSA, PTSX
 230       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
 231 *
 232 *     .. External Subroutines ..
 233       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,
 234      $                   SGETRS, XERBLA
 235 *     ..
 236 *     .. External Functions ..
 237       INTEGER            IDAMAX
 238       DOUBLE PRECISION   DLAMCH, DLANGE
 239       EXTERNAL           IDAMAX, DLAMCH, DLANGE
 240 *     ..
 241 *     .. Intrinsic Functions ..
 242       INTRINSIC          ABS, DBLE, MAX, SQRT
 243 *     ..
 244 *     .. Executable Statements ..
 245 *
 246       INFO = 0
 247       ITER = 0
 248 *
 249 *     Test the input parameters.
 250 *
 251       IF( N.LT.0 ) THEN
 252          INFO = -1
 253       ELSE IF( NRHS.LT.0 ) THEN
 254          INFO = -2
 255       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
 256          INFO = -4
 257       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
 258          INFO = -7
 259       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
 260          INFO = -9
 261       END IF
 262       IF( INFO.NE.0 ) THEN
 263          CALL XERBLA( 'DSGESV', -INFO )
 264          RETURN
 265       END IF
 266 *
 267 *     Quick return if (N.EQ.0).
 268 *
 269       IF( N.EQ.0 )
 270      $   RETURN
 271 *
 272 *     Skip single precision iterative refinement if a priori slower
 273 *     than double precision factorization.
 274 *
 275       IF( .NOT.DOITREF ) THEN
 276          ITER = -1
 277          GO TO 40
 278       END IF
 279 *
 280 *     Compute some constants.
 281 *
 282       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
 283       EPS = DLAMCH( 'Epsilon' )
 284       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
 285 *
 286 *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
 287 *
 288       PTSA = 1
 289       PTSX = PTSA + N*N
 290 *
 291 *     Convert B from double precision to single precision and store the
 292 *     result in SX.
 293 *
 294       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
 295 *
 296       IF( INFO.NE.0 ) THEN
 297          ITER = -2
 298          GO TO 40
 299       END IF
 300 *
 301 *     Convert A from double precision to single precision and store the
 302 *     result in SA.
 303 *
 304       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
 305 *
 306       IF( INFO.NE.0 ) THEN
 307          ITER = -2
 308          GO TO 40
 309       END IF
 310 *
 311 *     Compute the LU factorization of SA.
 312 *
 313       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
 314 *
 315       IF( INFO.NE.0 ) THEN
 316          ITER = -3
 317          GO TO 40
 318       END IF
 319 *
 320 *     Solve the system SA*SX = SB.
 321 *
 322       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
 323      $             SWORK( PTSX ), N, INFO )
 324 *
 325 *     Convert SX back to double precision
 326 *
 327       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
 328 *
 329 *     Compute R = B - AX (R is WORK).
 330 *
 331       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
 332 *
 333       CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
 334      $            LDA, X, LDX, ONE, WORK, N )
 335 *
 336 *     Check whether the NRHS normwise backward errors satisfy the
 337 *     stopping criterion. If yes, set ITER=0 and return.
 338 *
 339       DO I = 1, NRHS
 340          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
 341          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
 342          IF( RNRM.GT.XNRM*CTE )
 343      $      GO TO 10
 344       END DO
 345 *
 346 *     If we are here, the NRHS normwise backward errors satisfy the
 347 *     stopping criterion. We are good to exit.
 348 *
 349       ITER = 0
 350       RETURN
 351 *
 352    10 CONTINUE
 353 *
 354       DO 30 IITER = 1, ITERMAX
 355 *
 356 *        Convert R (in WORK) from double precision to single precision
 357 *        and store the result in SX.
 358 *
 359          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
 360 *
 361          IF( INFO.NE.0 ) THEN
 362             ITER = -2
 363             GO TO 40
 364          END IF
 365 *
 366 *        Solve the system SA*SX = SR.
 367 *
 368          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
 369      $                SWORK( PTSX ), N, INFO )
 370 *
 371 *        Convert SX back to double precision and update the current
 372 *        iterate.
 373 *
 374          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
 375 *
 376          DO I = 1, NRHS
 377             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
 378          END DO
 379 *
 380 *        Compute R = B - AX (R is WORK).
 381 *
 382          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
 383 *
 384          CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
 385      $               A, LDA, X, LDX, ONE, WORK, N )
 386 *
 387 *        Check whether the NRHS normwise backward errors satisfy the
 388 *        stopping criterion. If yes, set ITER=IITER>0 and return.
 389 *
 390          DO I = 1, NRHS
 391             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
 392             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
 393             IF( RNRM.GT.XNRM*CTE )
 394      $         GO TO 20
 395          END DO
 396 *
 397 *        If we are here, the NRHS normwise backward errors satisfy the
 398 *        stopping criterion, we are good to exit.
 399 *
 400          ITER = IITER
 401 *
 402          RETURN
 403 *
 404    20    CONTINUE
 405 *
 406    30 CONTINUE
 407 *
 408 *     If we are at this place of the code, this is because we have
 409 *     performed ITER=ITERMAX iterations and never satisified the
 410 *     stopping criterion, set up the ITER flag accordingly and follow up
 411 *     on double precision routine.
 412 *
 413       ITER = -ITERMAX - 1
 414 *
 415    40 CONTINUE
 416 *
 417 *     Single-precision iterative refinement failed to converge to a
 418 *     satisfactory solution, so we resort to double precision.
 419 *
 420       CALL DGETRF( N, N, A, LDA, IPIV, INFO )
 421 *
 422       IF( INFO.NE.0 )
 423      $   RETURN
 424 *
 425       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
 426       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
 427      $             INFO )
 428 *
 429       RETURN
 430 *
 431 *     End of DSGESV.
 432 *
 433       END