From 1ac0f87e617d0d0164341892b9dea8a0dfb60582 Mon Sep 17 00:00:00 2001 From: Ichitaro Yamazaki Date: Thu, 17 Nov 2016 14:17:23 -0500 Subject: [PATCH] first version of complex symmetric solvers. --- SRC/clasyf_aa.f | 506 ++++++++++++++++++++++++++++++++++++++++++++++++ SRC/csysv_aa.f | 254 ++++++++++++++++++++++++ SRC/csytrf_aa.f | 480 +++++++++++++++++++++++++++++++++++++++++++++ SRC/csytrs_aa.f | 285 +++++++++++++++++++++++++++ SRC/zlasyf_aa.f | 506 ++++++++++++++++++++++++++++++++++++++++++++++++ SRC/zsysv_aa.f | 254 ++++++++++++++++++++++++ SRC/zsytrf_aa.f | 480 +++++++++++++++++++++++++++++++++++++++++++++ SRC/zsytrs_aa.f | 285 +++++++++++++++++++++++++++ 8 files changed, 3050 insertions(+) create mode 100644 SRC/clasyf_aa.f create mode 100644 SRC/csysv_aa.f create mode 100644 SRC/csytrf_aa.f create mode 100644 SRC/csytrs_aa.f create mode 100644 SRC/zlasyf_aa.f create mode 100644 SRC/zsysv_aa.f create mode 100644 SRC/zsytrf_aa.f create mode 100644 SRC/zsytrs_aa.f diff --git a/SRC/clasyf_aa.f b/SRC/clasyf_aa.f new file mode 100644 index 00000000..e5339d5e --- /dev/null +++ b/SRC/clasyf_aa.f @@ -0,0 +1,506 @@ +*> \brief \b CLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a complex symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diatonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by CSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace, dimension (M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2 + COMPLEX PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ILAENV + EXTERNAL LSAME, ILAENV, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from CSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 +* +* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:N, J) has been initialized to be A(J, J:N) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CGEMV( 'No transpose', M-J+1, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:n, i) into WORK +* + CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* + ALPHA = -A( K-1, J ) + CALL CAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL CAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* + CALL CSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) + IF( (A( K, J ).EQ.ZERO ) .AND. + $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN + IF(INFO .EQ. 0) THEN + INFO = J + ENDIF + END IF +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J:N, J), +* + CALL CCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL CSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL CLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + ELSE + IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN + INFO = J + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from CSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 +* +* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:N, J) has been initialized to be A(J:N, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CGEMV( 'No transpose', M-J+1, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:N, J) into WORK +* + CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL CAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L((J+1):N, J) +* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL CAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* + CALL CSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) + IF( (A( J, K ).EQ.ZERO) .AND. + $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN + IF (INFO .EQ. 0) + $ INFO = J + END IF +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J+1:N, J), +* + CALL CCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL CSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL CLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + ELSE + IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN + INFO = J + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of CLASYF_AA +* + END diff --git a/SRC/csysv_aa.f b/SRC/csysv_aa.f new file mode 100644 index 00000000..7c82a400 --- /dev/null +++ b/SRC/csysv_aa.f @@ -0,0 +1,254 @@ +*> \brief CSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(2*N, 3*N-2), and for +*> the best performance, LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for CSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexSYsolve +* +* ===================================================================== + SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYTRF, CSYTRS, CSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV_AA +* + END diff --git a/SRC/csytrf_aa.f b/SRC/csytrf_aa.f new file mode 100644 index 00000000..20aa5ba2 --- /dev/null +++ b/SRC/csytrf_aa.f @@ -0,0 +1,480 @@ +*> \brief \b CSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF_AA computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=2*N. For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT, IINFO + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + IF ( A( 1, 1 ).EQ.ZERO ) THEN + INFO = 1 + END IF + RETURN + END IF +* +* Adjubst block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), + $ IINFO ) + IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN + INFO = IINFO+J + ENDIF +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL CCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with CGEMM +* + CALL CGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) + IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN + INFO = IINFO+J + ENDIF +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL CCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with CGEMM +* + CALL CGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of CSYTRF_AA +* + END diff --git a/SRC/csytrs_aa.f b/SRC/csytrs_aa.f new file mode 100644 index 00000000..5f21078f --- /dev/null +++ b/SRC/csytrs_aa.f @@ -0,0 +1,285 @@ +*> \brief \b CSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS_AA solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Details of factors computed by CSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by CSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= 3*N-2. +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGTSV, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A( 1, 1 ), LDA+1, WORK( N ), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1 ) + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL CGTSV( N, NRHS, WORK( 1 ), WORK( N ), WORK( 2*N ), B, LDB, + $ INFO ) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1 ) + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL CGTSV( N, NRHS, WORK( 1 ), WORK(N), WORK( 2*N ), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of CSYTRS_AA +* + END diff --git a/SRC/zlasyf_aa.f b/SRC/zlasyf_aa.f new file mode 100644 index 00000000..ee32fc4e --- /dev/null +++ b/SRC/zlasyf_aa.f @@ -0,0 +1,506 @@ +*> \brief \b ZLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a complex symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diatonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by ZSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace, dimension (M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2 + COMPLEX*16 PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX, ILAENV + EXTERNAL LSAME, ILAENV, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from ZSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 +* +* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:N, J) has been initialized to be A(J, J:N) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZGEMV( 'No transpose', M-J+1, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:n, i) into WORK +* + CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* + ALPHA = -A( K-1, J ) + CALL ZAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL ZAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* + CALL ZSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) + IF( (A( K, J ).EQ.ZERO ) .AND. + $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN + IF(INFO .EQ. 0) THEN + INFO = J + ENDIF + END IF +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J:N, J), +* + CALL ZCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL ZSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL ZLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + ELSE + IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN + INFO = J + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from ZSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 +* +* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:N, J) has been initialized to be A(J:N, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZGEMV( 'No transpose', M-J+1, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:N, J) into WORK +* + CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL ZAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L((J+1):N, J) +* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL ZAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* + CALL ZSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) + IF( (A( J, K ).EQ.ZERO) .AND. + $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN + IF (INFO .EQ. 0) + $ INFO = J + END IF +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J+1:N, J), +* + CALL ZCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL ZSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL ZLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + ELSE + IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN + INFO = J + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of ZLASYF_AA +* + END diff --git a/SRC/zsysv_aa.f b/SRC/zsysv_aa.f new file mode 100644 index 00000000..f1d1f76d --- /dev/null +++ b/SRC/zsysv_aa.f @@ -0,0 +1,254 @@ +*> \brief ZSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(2*N, 3*N-2), and for +*> the best performance, LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for ZSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16SYsolve +* +* ===================================================================== + SUBROUTINE ZSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF, ZSYTRS, ZSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV_AA +* + END diff --git a/SRC/zsytrf_aa.f b/SRC/zsytrf_aa.f new file mode 100644 index 00000000..4e9851a3 --- /dev/null +++ b/SRC/zsytrf_aa.f @@ -0,0 +1,480 @@ +*> \brief \b ZSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF_AA computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=2*N. For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT, IINFO + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX*16 ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + IF ( A( 1, 1 ).EQ.ZERO ) THEN + INFO = 1 + END IF + RETURN + END IF +* +* Adjubst block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), + $ IINFO ) + IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN + INFO = IINFO+J + ENDIF +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL ZCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with ZGEMM +* + CALL ZGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) + IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN + INFO = IINFO+J + ENDIF +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL ZCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with ZGEMM +* + CALL ZGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of ZSYTRF_AA +* + END diff --git a/SRC/zsytrs_aa.f b/SRC/zsytrs_aa.f new file mode 100644 index 00000000..b9a2ab83 --- /dev/null +++ b/SRC/zsytrs_aa.f @@ -0,0 +1,285 @@ +*> \brief \b ZSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS_AA solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Details of factors computed by ZSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by ZSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= 3*N-2. +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGTSV, ZSWAP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A( 1, 1 ), LDA+1, WORK( N ), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1 ) + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL ZGTSV( N, NRHS, WORK( 1 ), WORK( N ), WORK( 2*N ), B, LDB, + $ INFO ) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1 ) + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL ZGTSV( N, NRHS, WORK( 1 ), WORK(N), WORK( 2*N ), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of ZSYTRS_AA +* + END -- 2.34.1