SRC/chetrf_aasen.f

   1 *> \brief \b CHETRF_AASEN
   2 *
   3 *  =========== DOCUMENTATION ===========
   4 *
   5 * Online html documentation available at
   6 *            http://www.netlib.org/lapack/explore-html/
   7 *
   8 *> \htmlonly
   9 *> Download CHETRF_AASEN + dependencies
  10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aasen.f">
  11 *> [TGZ]</a>
  12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aasen.f">
  13 *> [ZIP]</a>
  14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aasen.f">
  15 *> [TXT]</a>
  16 *> \endhtmlonly
  17 *
  18 *  Definition:
  19 *  ===========
  20 *
  21 *       SUBROUTINE CHETRF_AASEN( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
  22 *
  23 *       .. Scalar Arguments ..
  24 *       CHARACTER    UPLO
  25 *       INTEGER      N, LDA, LWORK, INFO
  26 *       ..
  27 *       .. Array Arguments ..
  28 *       INTEGER      IPIV( * )
  29 *       COMPLEX   A( LDA, * ), WORK( * )
  30 *       ..
  31 *
  32 *> \par Purpose:
  33 *  =============
  34 *>
  35 *> \verbatim
  36 *>
  37 *> CHETRF_AASEN computes the factorization of a real hermitian matrix A
  38 *> using the Aasen's algorithm.  The form of the factorization is
  39 *>
  40 *>    A = U*T*U**T  or  A = L*T*L**T
  41 *>
  42 *> where U (or L) is a product of permutation and unit upper (lower)
  43 *> triangular matrices, and T is a hermitian tridiagonal matrix.
  44 *>
  45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
  46 *> \endverbatim
  47 *
  48 *  Arguments:
  49 *  ==========
  50 *
  51 *> \param[in] UPLO
  52 *> \verbatim
  53 *>          UPLO is CHARACTER*1
  54 *>          = 'U':  Upper triangle of A is stored;
  55 *>          = 'L':  Lower triangle of A is stored.
  56 *> \endverbatim
  57 *>
  58 *> \param[in] N
  59 *> \verbatim
  60 *>          N is INTEGER
  61 *>          The order of the matrix A.  N >= 0.
  62 *> \endverbatim
  63 *>
  64 *> \param[in,out] A
  65 *> \verbatim
  66 *>          A is COMPLEX array, dimension (LDA,N)
  67 *>          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
  68 *>          N-by-N upper triangular part of A contains the upper
  69 *>          triangular part of the matrix A, and the strictly lower
  70 *>          triangular part of A is not referenced.  If UPLO = 'L', the
  71 *>          leading N-by-N lower triangular part of A contains the lower
  72 *>          triangular part of the matrix A, and the strictly upper
  73 *>          triangular part of A is not referenced.
  74 *>
  75 *>          On exit, the tridiagonal matrix is stored in the diagonals
  76 *>          and the subdiagonals of A just below (or above) the diagonals,
  77 *>          and L is stored below (or above) the subdiaonals, when UPLO
  78 *>          is 'L' (or 'U').
  79 *> \endverbatim
  80 *>
  81 *> \param[in] LDA
  82 *> \verbatim
  83 *>          LDA is INTEGER
  84 *>          The leading dimension of the array A.  LDA >= max(1,N).
  85 *> \endverbatim
  86 *>
  87 *> \param[out] IPIV
  88 *> \verbatim
  89 *>          IPIV is INTEGER array, dimension (N)
  90 *>          On exit, it contains the details of the interchanges, i.e.,
  91 *>          the row and column k of A were interchanged with the
  92 *>          row and column IPIV(k).
  93 *> \endverbatim
  94 *>
  95 *> \param[out] WORK
  96 *> \verbatim
  97 *>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
  98 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  99 *> \endverbatim
 100 *>
 101 *> \param[in] LWORK
 102 *> \verbatim
 103 *>          LWORK is INTEGER
 104 *>          The length of WORK.  LWORK >= 2*N. For optimum performance
 105 *>          LWORK >= N*(1+NB), where NB is the optimal blocksize.
 106 *>
 107 *>          If LWORK = -1, then a workspace query is assumed; the routine
 108 *>          only calculates the optimal size of the WORK array, returns
 109 *>          this value as the first entry of the WORK array, and no error
 110 *>          message related to LWORK is issued by XERBLA.
 111 *> \endverbatim
 112 *>
 113 *> \param[out] INFO
 114 *> \verbatim
 115 *>          INFO is INTEGER
 116 *>          = 0:  successful exit
 117 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 118 *>          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
 119 *>                has been completed, but the block diagonal matrix D is
 120 *>                exactly singular, and division by zero will occur if it
 121 *>                is used to solve a system of equations.
 122 *> \endverbatim
 123 *
 124 *  Authors:
 125 *  ========
 126 *
 127 *> \author Univ. of Tennessee
 128 *> \author Univ. of California Berkeley
 129 *> \author Univ. of Colorado Denver
 130 *> \author NAG Ltd.
 131 *
 132 *> \date November 2016
 133 *
 134 *> \ingroup complexSYcomputational
 135 *
 136 *  @generated from zhetrf_aasen.f, fortran z -> c, Sun Oct  2 22:29:10 2016
 137 *
 138 *  =====================================================================
 139       SUBROUTINE CHETRF_AASEN( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
 140 *
 141 *  -- LAPACK computational routine (version 3.4.0) --
 142 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 143 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 144 *     November 2016
 145 *
 146       IMPLICIT NONE
 147 *
 148 *     .. Scalar Arguments ..
 149       CHARACTER    UPLO
 150       INTEGER      N, LDA, LWORK, INFO
 151 *     ..
 152 *     .. Array Arguments ..
 153       INTEGER      IPIV( * )
 154       COMPLEX   A( LDA, * ), WORK( * )
 155 *     ..
 156 *
 157 *  =====================================================================
 158 *     .. Parameters ..
 159       COMPLEX   ZERO, ONE
 160       PARAMETER    ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) )
 161 *
 162 *     .. Local Scalars ..
 163       LOGICAL      LQUERY, UPPER
 164       INTEGER      J, LWKOPT, IINFO
 165       INTEGER      NB, MJ, NJ, K1, K2, J1, J2, J3, JB
 166       COMPLEX   ALPHA
 167 *     ..
 168 *     .. External Functions ..
 169       LOGICAL      LSAME
 170       INTEGER      ILAENV
 171       EXTERNAL     LSAME, ILAENV
 172 *     ..
 173 *     .. External Subroutines ..
 174       EXTERNAL     XERBLA
 175 *     ..
 176 *     .. Intrinsic Functions ..
 177       INTRINSIC    REAL, CONJG, MAX
 178 *     ..
 179 *     .. Executable Statements ..
 180 *
 181 *     Determine the block size
 182 *
 183       NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
 184 *
 185 *     Test the input parameters.
 186 *
 187       INFO = 0
 188       UPPER = LSAME( UPLO, 'U' )
 189       LQUERY = ( LWORK.EQ.-1 )
 190       IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
 191          INFO = -1
 192       ELSE IF( N.LT.0 ) THEN
 193          INFO = -2
 194       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
 195          INFO = -4
 196       ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN
 197          INFO = -7
 198       END IF
 199 *
 200       IF( INFO.EQ.0 ) THEN
 201          LWKOPT = (NB+1)*N
 202          WORK( 1 ) = LWKOPT
 203       END IF
 204 *
 205       IF( INFO.NE.0 ) THEN
 206          CALL XERBLA( 'CHETRF_AASEN', -INFO )
 207          RETURN
 208       ELSE IF( LQUERY ) THEN
 209          RETURN
 210       END IF
 211 *
 212 *     Quick return
 213 *
 214       IF ( N.EQ.0 ) THEN
 215           RETURN
 216       ENDIF
 217       IPIV( 1 ) = 1
 218       IF ( N.EQ.1 ) THEN
 219          A( 1, 1 ) = REAL( A( 1, 1 ) )
 220          IF ( A( 1, 1 ).EQ.ZERO ) THEN
 221             INFO = 1
 222          END IF
 223          RETURN
 224       END IF
 225 *
 226 *     Adjubst block size based on the workspace size
 227 *
 228       IF( LWORK.LT.((1+NB)*N) ) THEN
 229          NB = ( LWORK-N ) / N
 230       END IF
 231 *
 232       IF( UPPER ) THEN
 233 *
 234 *        .....................................................
 235 *        Factorize A as L*D*L**T using the upper triangle of A
 236 *        .....................................................
 237 *
 238 *        copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
 239 *
 240          CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
 241 *
 242 *        J is the main loop index, increasing from 1 to N in steps of
 243 *        JB, where JB is the number of columns factorized by CLAHEF;
 244 *        JB is either NB, or N-J+1 for the last block
 245 *
 246          J = 0
 247  10      CONTINUE
 248          IF( J.GE.N )
 249      $      GO TO 20
 250 *
 251 *        each step of the main loop
 252 *         J is the last column of the previous panel
 253 *         J1 is the first column of the current panel
 254 *         K1 identifies if the previous column of the panel has been
 255 *          explicitly stored, e.g., K1=1 for the first panel, and
 256 *          K1=0 for the rest
 257 *
 258          J1 = J + 1
 259          JB = MIN( N-J1+1, NB )
 260          K1 = MAX(1, J)-J
 261 *
 262 *        Panel factorization
 263 *
 264          CALL CLAHEF_AASEN( UPLO, 2-K1, N-J, JB,
 265      $                      A( MAX(1, J), J+1 ), LDA,
 266      $                      IPIV( J+1 ), WORK, N, WORK( N*NB+1 ),
 267      $                      IINFO )
 268          IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
 269              INFO = IINFO+J
 270          ENDIF
 271 *
 272 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
 273 *
 274          DO J2 = J+2, MIN(N, J+JB+1)
 275             IPIV( J2 ) = IPIV( J2 ) + J
 276             IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
 277                CALL CSWAP( J1-K1-2, A( 1, J2 ), 1,
 278      $                              A( 1, IPIV(J2) ), 1 )
 279             END IF
 280          END DO
 281          J = J + JB
 282 *
 283 *        Trailing submatrix update, where
 284 *         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
 285 *         WORK stores the current block of the auxiriarly matrix H
 286 *
 287          IF( J.LT.N ) THEN
 288 *
 289 *          if the first panel and JB=1 (NB=1), then nothing to do
 290 *
 291             IF( J1.GT.1 .OR. JB.GT.1 ) THEN
 292 *
 293 *              Merge rank-1 update with BLAS-3 update
 294 *
 295                ALPHA = CONJG( A( J, J+1 ) )
 296                A( J, J+1 ) = ONE
 297                CALL CCOPY( N-J, A( J-1, J+1 ), LDA,
 298      $                          WORK( (J+1-J1+1)+JB*N ), 1 )
 299                CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
 300 *
 301 *              K1 identifies if the previous column of the panel has been
 302 *               explicitly stored, e.g., K1=0 and K2=1 for the first panel,
 303 *               and K1=1 and K2=0 for the rest
 304 *
 305                IF( J1.GT.1 ) THEN
 306 *
 307 *                 Not first panel
 308 *
 309                   K2 = 1
 310                ELSE
 311 *
 312 *                 First panel
 313 *
 314                   K2 = 0
 315 *
 316 *                 First update skips the first column
 317 *
 318                   JB = JB - 1
 319                END IF
 320 *
 321                DO J2 = J+1, N, NB
 322                   NJ = MIN( NB, N-J2+1 )
 323 *
 324 *                 Update (J2, J2) diagonal block with CGEMV
 325 *
 326                   J3 = J2
 327                   DO MJ = NJ-1, 1, -1
 328                      CALL CGEMM( 'Conjugate transpose', 'Transpose',
 329      $                            1, MJ, JB+1,
 330      $                           -ONE, A( J1-K2, J3 ), LDA,
 331      $                                 WORK( (J3-J1+1)+K1*N ), N,
 332      $                            ONE, A( J3, J3 ), LDA )
 333                      J3 = J3 + 1
 334                   END DO
 335 *
 336 *                 Update off-diagonal block of J2-th block row with CGEMM
 337 *
 338                   CALL CGEMM( 'Conjugate transpose', 'Transpose',
 339      $                        NJ, N-J3+1, JB+1,
 340      $                       -ONE, A( J1-K2, J2 ), LDA,
 341      $                             WORK( (J3-J1+1)+K1*N ), N,
 342      $                        ONE, A( J2, J3 ), LDA )
 343                END DO
 344 *
 345 *              Recover T( J, J+1 )
 346 *
 347                A( J, J+1 ) = CONJG( ALPHA )
 348             END IF
 349 *
 350 *           WORK(J+1, 1) stores H(J+1, 1)
 351 *
 352             CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
 353          END IF
 354          GO TO 10
 355       ELSE
 356 *
 357 *        .....................................................
 358 *        Factorize A as L*D*L**T using the lower triangle of A
 359 *        .....................................................
 360 *
 361 *        copy first column A(1:N, 1) into H(1:N, 1)
 362 *         (stored in WORK(1:N))
 363 *
 364          CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
 365 *
 366 *        J is the main loop index, increasing from 1 to N in steps of
 367 *        JB, where JB is the number of columns factorized by CLAHEF;
 368 *        JB is either NB, or N-J+1 for the last block
 369 *
 370          J = 0
 371  11      CONTINUE
 372          IF( J.GE.N )
 373      $      GO TO 20
 374 *
 375 *        each step of the main loop
 376 *         J is the last column of the previous panel
 377 *         J1 is the first column of the current panel
 378 *         K1 identifies if the previous column of the panel has been
 379 *          explicitly stored, e.g., K1=1 for the first panel, and
 380 *          K1=0 for the rest
 381 *
 382          J1 = J+1
 383          JB = MIN( N-J1+1, NB )
 384          K1 = MAX(1, J)-J
 385 *
 386 *        Panel factorization
 387 *
 388          CALL CLAHEF_AASEN( UPLO, 2-K1, N-J, JB,
 389      $                      A( J+1, MAX(1, J) ), LDA,
 390      $                      IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO)
 391          IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
 392             INFO = IINFO+J
 393          ENDIF
 394 *
 395 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
 396 *
 397          DO J2 = J+2, MIN(N, J+JB+1)
 398             IPIV( J2 ) = IPIV( J2 ) + J
 399             IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
 400                CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA,
 401      $                              A( IPIV(J2), 1 ), LDA )
 402             END IF
 403          END DO
 404          J = J + JB
 405 *
 406 *        Trailing submatrix update, where
 407 *          A(J2+1, J1-1) stores L(J2+1, J1) and
 408 *          WORK(J2+1, 1) stores H(J2+1, 1)
 409 *
 410          IF( J.LT.N ) THEN
 411 *
 412 *          if the first panel and JB=1 (NB=1), then nothing to do
 413 *
 414             IF( J1.GT.1 .OR. JB.GT.1 ) THEN
 415 *
 416 *              Merge rank-1 update with BLAS-3 update
 417 *
 418                ALPHA = CONJG( A( J+1, J ) )
 419                A( J+1, J ) = ONE
 420                CALL CCOPY( N-J, A( J+1, J-1 ), 1,
 421      $                          WORK( (J+1-J1+1)+JB*N ), 1 )
 422                CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
 423 *
 424 *              K1 identifies if the previous column of the panel has been
 425 *               explicitly stored, e.g., K1=0 and K2=1 for the first panel,
 426 *               and K1=1 and K2=0 for the rest
 427 *
 428                IF( J1.GT.1 ) THEN
 429 *
 430 *                 Not first panel
 431 *
 432                   K2 = 1
 433                ELSE
 434 *
 435 *                 First panel
 436 *
 437                   K2 = 0
 438 *
 439 *                 First update skips the first column
 440 *
 441                   JB = JB - 1
 442                END IF
 443 *
 444                DO J2 = J+1, N, NB
 445                   NJ = MIN( NB, N-J2+1 )
 446 *
 447 *                 Update (J2, J2) diagonal block with CGEMV
 448 *
 449                   J3 = J2
 450                   DO MJ = NJ-1, 1, -1
 451                      CALL CGEMM( 'No transpose', 'Conjugate transpose',
 452      $                           MJ, 1, JB+1,
 453      $                          -ONE, WORK( (J3-J1+1)+K1*N ), N,
 454      $                                A( J3, J1-K2 ), LDA,
 455      $                           ONE, A( J3, J3 ), LDA )
 456                      J3 = J3 + 1
 457                   END DO
 458 *
 459 *                 Update off-diagonal block of J2-th block column with CGEMM
 460 *
 461                   CALL CGEMM( 'No transpose', 'Conjugate transpose',
 462      $                        N-J3+1, NJ, JB+1,
 463      $                       -ONE, WORK( (J3-J1+1)+K1*N ), N,
 464      $                             A( J2, J1-K2 ), LDA,
 465      $                        ONE, A( J3, J2 ), LDA )
 466                END DO
 467 *
 468 *              Recover T( J+1, J )
 469 *
 470                A( J+1, J ) = CONJG( ALPHA )
 471             END IF
 472 *
 473 *           WORK(J+1, 1) stores H(J+1, 1)
 474 *
 475             CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
 476          END IF
 477          GO TO 11
 478       END IF
 479 *
 480    20 CONTINUE
 481       RETURN
 482 *
 483 *     End of CHETRF_AASEN
 484 *
 485       END