SRC/cgbtrf.f

   1 *> \brief \b CGBTRF
   2 *
   3 *  =========== DOCUMENTATION ===========
   4 *
   5 * Online html documentation available at
   6 *            http://www.netlib.org/lapack/explore-html/
   7 *
   8 *> \htmlonly
   9 *> Download CGBTRF + dependencies
  10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgbtrf.f">
  11 *> [TGZ]</a>
  12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgbtrf.f">
  13 *> [ZIP]</a>
  14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgbtrf.f">
  15 *> [TXT]</a>
  16 *> \endhtmlonly
  17 *
  18 *  Definition:
  19 *  ===========
  20 *
  21 *       SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  22 *
  23 *       .. Scalar Arguments ..
  24 *       INTEGER            INFO, KL, KU, LDAB, M, N
  25 *       ..
  26 *       .. Array Arguments ..
  27 *       INTEGER            IPIV( * )
  28 *       COMPLEX            AB( LDAB, * )
  29 *       ..
  30 *
  31 *
  32 *> \par Purpose:
  33 *  =============
  34 *>
  35 *> \verbatim
  36 *>
  37 *> CGBTRF computes an LU factorization of a complex m-by-n band matrix A
  38 *> using partial pivoting with row interchanges.
  39 *>
  40 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
  41 *> \endverbatim
  42 *
  43 *  Arguments:
  44 *  ==========
  45 *
  46 *> \param[in] M
  47 *> \verbatim
  48 *>          M is INTEGER
  49 *>          The number of rows of the matrix A.  M >= 0.
  50 *> \endverbatim
  51 *>
  52 *> \param[in] N
  53 *> \verbatim
  54 *>          N is INTEGER
  55 *>          The number of columns of the matrix A.  N >= 0.
  56 *> \endverbatim
  57 *>
  58 *> \param[in] KL
  59 *> \verbatim
  60 *>          KL is INTEGER
  61 *>          The number of subdiagonals within the band of A.  KL >= 0.
  62 *> \endverbatim
  63 *>
  64 *> \param[in] KU
  65 *> \verbatim
  66 *>          KU is INTEGER
  67 *>          The number of superdiagonals within the band of A.  KU >= 0.
  68 *> \endverbatim
  69 *>
  70 *> \param[in,out] AB
  71 *> \verbatim
  72 *>          AB is COMPLEX array, dimension (LDAB,N)
  73 *>          On entry, the matrix A in band storage, in rows KL+1 to
  74 *>          2*KL+KU+1; rows 1 to KL of the array need not be set.
  75 *>          The j-th column of A is stored in the j-th column of the
  76 *>          array AB as follows:
  77 *>          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
  78 *>
  79 *>          On exit, details of the factorization: U is stored as an
  80 *>          upper triangular band matrix with KL+KU superdiagonals in
  81 *>          rows 1 to KL+KU+1, and the multipliers used during the
  82 *>          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
  83 *>          See below for further details.
  84 *> \endverbatim
  85 *>
  86 *> \param[in] LDAB
  87 *> \verbatim
  88 *>          LDAB is INTEGER
  89 *>          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
  90 *> \endverbatim
  91 *>
  92 *> \param[out] IPIV
  93 *> \verbatim
  94 *>          IPIV is INTEGER array, dimension (min(M,N))
  95 *>          The pivot indices; for 1 <= i <= min(M,N), row i of the
  96 *>          matrix was interchanged with row IPIV(i).
  97 *> \endverbatim
  98 *>
  99 *> \param[out] INFO
 100 *> \verbatim
 101 *>          INFO is INTEGER
 102 *>          = 0: successful exit
 103 *>          < 0: if INFO = -i, the i-th argument had an illegal value
 104 *>          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
 105 *>               has been completed, but the factor U is exactly
 106 *>               singular, and division by zero will occur if it is used
 107 *>               to solve a system of equations.
 108 *> \endverbatim
 109 *
 110 *  Authors:
 111 *  ========
 112 *
 113 *> \author Univ. of Tennessee
 114 *> \author Univ. of California Berkeley
 115 *> \author Univ. of Colorado Denver
 116 *> \author NAG Ltd.
 117 *
 118 *> \date November 2011
 119 *
 120 *> \ingroup complexGBcomputational
 121 *
 122 *> \par Further Details:
 123 *  =====================
 124 *>
 125 *> \verbatim
 126 *>
 127 *>  The band storage scheme is illustrated by the following example, when
 128 *>  M = N = 6, KL = 2, KU = 1:
 129 *>
 130 *>  On entry:                       On exit:
 131 *>
 132 *>      *    *    *    +    +    +       *    *    *   u14  u25  u36
 133 *>      *    *    +    +    +    +       *    *   u13  u24  u35  u46
 134 *>      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
 135 *>     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
 136 *>     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
 137 *>     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
 138 *>
 139 *>  Array elements marked * are not used by the routine; elements marked
 140 *>  + need not be set on entry, but are required by the routine to store
 141 *>  elements of U because of fill-in resulting from the row interchanges.
 142 *> \endverbatim
 143 *>
 144 *  =====================================================================
 145       SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
 146 *
 147 *  -- LAPACK computational routine (version 3.4.0) --
 148 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 149 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 150 *     November 2011
 151 *
 152 *     .. Scalar Arguments ..
 153       INTEGER            INFO, KL, KU, LDAB, M, N
 154 *     ..
 155 *     .. Array Arguments ..
 156       INTEGER            IPIV( * )
 157       COMPLEX            AB( LDAB, * )
 158 *     ..
 159 *
 160 *  =====================================================================
 161 *
 162 *     .. Parameters ..
 163       COMPLEX            ONE, ZERO
 164       PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
 165      $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
 166       INTEGER            NBMAX, LDWORK
 167       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
 168 *     ..
 169 *     .. Local Scalars ..
 170       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
 171      $                   JU, K2, KM, KV, NB, NW
 172       COMPLEX            TEMP
 173 *     ..
 174 *     .. Local Arrays ..
 175       COMPLEX            WORK13( LDWORK, NBMAX ),
 176      $                   WORK31( LDWORK, NBMAX )
 177 *     ..
 178 *     .. External Functions ..
 179       INTEGER            ICAMAX, ILAENV
 180       EXTERNAL           ICAMAX, ILAENV
 181 *     ..
 182 *     .. External Subroutines ..
 183       EXTERNAL           CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
 184      $                   CSWAP, CTRSM, XERBLA
 185 *     ..
 186 *     .. Intrinsic Functions ..
 187       INTRINSIC          MAX, MIN
 188 *     ..
 189 *     .. Executable Statements ..
 190 *
 191 *     KV is the number of superdiagonals in the factor U, allowing for
 192 *     fill-in
 193 *
 194       KV = KU + KL
 195 *
 196 *     Test the input parameters.
 197 *
 198       INFO = 0
 199       IF( M.LT.0 ) THEN
 200          INFO = -1
 201       ELSE IF( N.LT.0 ) THEN
 202          INFO = -2
 203       ELSE IF( KL.LT.0 ) THEN
 204          INFO = -3
 205       ELSE IF( KU.LT.0 ) THEN
 206          INFO = -4
 207       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
 208          INFO = -6
 209       END IF
 210       IF( INFO.NE.0 ) THEN
 211          CALL XERBLA( 'CGBTRF', -INFO )
 212          RETURN
 213       END IF
 214 *
 215 *     Quick return if possible
 216 *
 217       IF( M.EQ.0 .OR. N.EQ.0 )
 218      $   RETURN
 219 *
 220 *     Determine the block size for this environment
 221 *
 222       NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
 223 *
 224 *     The block size must not exceed the limit set by the size of the
 225 *     local arrays WORK13 and WORK31.
 226 *
 227       NB = MIN( NB, NBMAX )
 228 *
 229       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
 230 *
 231 *        Use unblocked code
 232 *
 233          CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
 234       ELSE
 235 *
 236 *        Use blocked code
 237 *
 238 *        Zero the superdiagonal elements of the work array WORK13
 239 *
 240          DO 20 J = 1, NB
 241             DO 10 I = 1, J - 1
 242                WORK13( I, J ) = ZERO
 243    10       CONTINUE
 244    20    CONTINUE
 245 *
 246 *        Zero the subdiagonal elements of the work array WORK31
 247 *
 248          DO 40 J = 1, NB
 249             DO 30 I = J + 1, NB
 250                WORK31( I, J ) = ZERO
 251    30       CONTINUE
 252    40    CONTINUE
 253 *
 254 *        Gaussian elimination with partial pivoting
 255 *
 256 *        Set fill-in elements in columns KU+2 to KV to zero
 257 *
 258          DO 60 J = KU + 2, MIN( KV, N )
 259             DO 50 I = KV - J + 2, KL
 260                AB( I, J ) = ZERO
 261    50       CONTINUE
 262    60    CONTINUE
 263 *
 264 *        JU is the index of the last column affected by the current
 265 *        stage of the factorization
 266 *
 267          JU = 1
 268 *
 269          DO 180 J = 1, MIN( M, N ), NB
 270             JB = MIN( NB, MIN( M, N )-J+1 )
 271 *
 272 *           The active part of the matrix is partitioned
 273 *
 274 *              A11   A12   A13
 275 *              A21   A22   A23
 276 *              A31   A32   A33
 277 *
 278 *           Here A11, A21 and A31 denote the current block of JB columns
 279 *           which is about to be factorized. The number of rows in the
 280 *           partitioning are JB, I2, I3 respectively, and the numbers
 281 *           of columns are JB, J2, J3. The superdiagonal elements of A13
 282 *           and the subdiagonal elements of A31 lie outside the band.
 283 *
 284             I2 = MIN( KL-JB, M-J-JB+1 )
 285             I3 = MIN( JB, M-J-KL+1 )
 286 *
 287 *           J2 and J3 are computed after JU has been updated.
 288 *
 289 *           Factorize the current block of JB columns
 290 *
 291             DO 80 JJ = J, J + JB - 1
 292 *
 293 *              Set fill-in elements in column JJ+KV to zero
 294 *
 295                IF( JJ+KV.LE.N ) THEN
 296                   DO 70 I = 1, KL
 297                      AB( I, JJ+KV ) = ZERO
 298    70             CONTINUE
 299                END IF
 300 *
 301 *              Find pivot and test for singularity. KM is the number of
 302 *              subdiagonal elements in the current column.
 303 *
 304                KM = MIN( KL, M-JJ )
 305                JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
 306                IPIV( JJ ) = JP + JJ - J
 307                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
 308                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
 309                   IF( JP.NE.1 ) THEN
 310 *
 311 *                    Apply interchange to columns J to J+JB-1
 312 *
 313                      IF( JP+JJ-1.LT.J+KL ) THEN
 314 *
 315                         CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
 316      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
 317                      ELSE
 318 *
 319 *                       The interchange affects columns J to JJ-1 of A31
 320 *                       which are stored in the work array WORK31
 321 *
 322                         CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 323      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
 324                         CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
 325      $                              AB( KV+JP, JJ ), LDAB-1 )
 326                      END IF
 327                   END IF
 328 *
 329 *                 Compute multipliers
 330 *
 331                   CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
 332      $                        1 )
 333 *
 334 *                 Update trailing submatrix within the band and within
 335 *                 the current block. JM is the index of the last column
 336 *                 which needs to be updated.
 337 *
 338                   JM = MIN( JU, J+JB-1 )
 339                   IF( JM.GT.JJ )
 340      $               CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
 341      $                           AB( KV, JJ+1 ), LDAB-1,
 342      $                           AB( KV+1, JJ+1 ), LDAB-1 )
 343                ELSE
 344 *
 345 *                 If pivot is zero, set INFO to the index of the pivot
 346 *                 unless a zero pivot has already been found.
 347 *
 348                   IF( INFO.EQ.0 )
 349      $               INFO = JJ
 350                END IF
 351 *
 352 *              Copy current column of A31 into the work array WORK31
 353 *
 354                NW = MIN( JJ-J+1, I3 )
 355                IF( NW.GT.0 )
 356      $            CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
 357      $                        WORK31( 1, JJ-J+1 ), 1 )
 358    80       CONTINUE
 359             IF( J+JB.LE.N ) THEN
 360 *
 361 *              Apply the row interchanges to the other blocks.
 362 *
 363                J2 = MIN( JU-J+1, KV ) - JB
 364                J3 = MAX( 0, JU-J-KV+1 )
 365 *
 366 *              Use CLASWP to apply the row interchanges to A12, A22, and
 367 *              A32.
 368 *
 369                CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
 370      $                      IPIV( J ), 1 )
 371 *
 372 *              Adjust the pivot indices.
 373 *
 374                DO 90 I = J, J + JB - 1
 375                   IPIV( I ) = IPIV( I ) + J - 1
 376    90          CONTINUE
 377 *
 378 *              Apply the row interchanges to A13, A23, and A33
 379 *              columnwise.
 380 *
 381                K2 = J - 1 + JB + J2
 382                DO 110 I = 1, J3
 383                   JJ = K2 + I
 384                   DO 100 II = J + I - 1, J + JB - 1
 385                      IP = IPIV( II )
 386                      IF( IP.NE.II ) THEN
 387                         TEMP = AB( KV+1+II-JJ, JJ )
 388                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
 389                         AB( KV+1+IP-JJ, JJ ) = TEMP
 390                      END IF
 391   100             CONTINUE
 392   110          CONTINUE
 393 *
 394 *              Update the relevant part of the trailing submatrix
 395 *
 396                IF( J2.GT.0 ) THEN
 397 *
 398 *                 Update A12
 399 *
 400                   CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
 401      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
 402      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
 403 *
 404                   IF( I2.GT.0 ) THEN
 405 *
 406 *                    Update A22
 407 *
 408                      CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
 409      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
 410      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
 411      $                           AB( KV+1, J+JB ), LDAB-1 )
 412                   END IF
 413 *
 414                   IF( I3.GT.0 ) THEN
 415 *
 416 *                    Update A32
 417 *
 418                      CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
 419      $                           JB, -ONE, WORK31, LDWORK,
 420      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
 421      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
 422                   END IF
 423                END IF
 424 *
 425                IF( J3.GT.0 ) THEN
 426 *
 427 *                 Copy the lower triangle of A13 into the work array
 428 *                 WORK13
 429 *
 430                   DO 130 JJ = 1, J3
 431                      DO 120 II = JJ, JB
 432                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
 433   120                CONTINUE
 434   130             CONTINUE
 435 *
 436 *                 Update A13 in the work array
 437 *
 438                   CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
 439      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
 440      $                        WORK13, LDWORK )
 441 *
 442                   IF( I2.GT.0 ) THEN
 443 *
 444 *                    Update A23
 445 *
 446                      CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
 447      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
 448      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
 449      $                           LDAB-1 )
 450                   END IF
 451 *
 452                   IF( I3.GT.0 ) THEN
 453 *
 454 *                    Update A33
 455 *
 456                      CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
 457      $                           JB, -ONE, WORK31, LDWORK, WORK13,
 458      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
 459                   END IF
 460 *
 461 *                 Copy the lower triangle of A13 back into place
 462 *
 463                   DO 150 JJ = 1, J3
 464                      DO 140 II = JJ, JB
 465                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
 466   140                CONTINUE
 467   150             CONTINUE
 468                END IF
 469             ELSE
 470 *
 471 *              Adjust the pivot indices.
 472 *
 473                DO 160 I = J, J + JB - 1
 474                   IPIV( I ) = IPIV( I ) + J - 1
 475   160          CONTINUE
 476             END IF
 477 *
 478 *           Partially undo the interchanges in the current block to
 479 *           restore the upper triangular form of A31 and copy the upper
 480 *           triangle of A31 back into place
 481 *
 482             DO 170 JJ = J + JB - 1, J, -1
 483                JP = IPIV( JJ ) - JJ + 1
 484                IF( JP.NE.1 ) THEN
 485 *
 486 *                 Apply interchange to columns J to JJ-1
 487 *
 488                   IF( JP+JJ-1.LT.J+KL ) THEN
 489 *
 490 *                    The interchange does not affect A31
 491 *
 492                      CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 493      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
 494                   ELSE
 495 *
 496 *                    The interchange does affect A31
 497 *
 498                      CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 499      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
 500                   END IF
 501                END IF
 502 *
 503 *              Copy the current column of A31 back into place
 504 *
 505                NW = MIN( I3, JJ-J+1 )
 506                IF( NW.GT.0 )
 507      $            CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
 508      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
 509   170       CONTINUE
 510   180    CONTINUE
 511       END IF
 512 *
 513       RETURN
 514 *
 515 *     End of CGBTRF
 516 *
 517       END