1 *> \brief \b CHETRF_AASEN
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download CHETRF_AASEN + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aasen.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aasen.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aasen.f">
21 * SUBROUTINE CHETRF_AASEN( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
23 * .. Scalar Arguments ..
25 * INTEGER N, LDA, LWORK, INFO
27 * .. Array Arguments ..
29 * COMPLEX A( LDA, * ), WORK( * )
37 *> CHETRF_AASEN computes the factorization of a real hermitian matrix A
38 *> using the Aasen's algorithm. The form of the factorization is
40 *> A = U*T*U**T or A = L*T*L**T
42 *> where U (or L) is a product of permutation and unit upper (lower)
43 *> triangular matrices, and T is a hermitian tridiagonal matrix.
45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
61 *> The order of the matrix A. N >= 0.
66 *> A is COMPLEX array, dimension (LDA,N)
67 *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
68 *> N-by-N upper triangular part of A contains the upper
69 *> triangular part of the matrix A, and the strictly lower
70 *> triangular part of A is not referenced. If UPLO = 'L', the
71 *> leading N-by-N lower triangular part of A contains the lower
72 *> triangular part of the matrix A, and the strictly upper
73 *> triangular part of A is not referenced.
75 *> On exit, the tridiagonal matrix is stored in the diagonals
76 *> and the subdiagonals of A just below (or above) the diagonals,
77 *> and L is stored below (or above) the subdiaonals, when UPLO
84 *> The leading dimension of the array A. LDA >= max(1,N).
89 *> IPIV is INTEGER array, dimension (N)
90 *> On exit, it contains the details of the interchanges, i.e.,
91 *> the row and column k of A were interchanged with the
92 *> row and column IPIV(k).
97 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
98 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
104 *> The length of WORK. LWORK >= 2*N. For optimum performance
105 *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
107 *> If LWORK = -1, then a workspace query is assumed; the routine
108 *> only calculates the optimal size of the WORK array, returns
109 *> this value as the first entry of the WORK array, and no error
110 *> message related to LWORK is issued by XERBLA.
116 *> = 0: successful exit
117 *> < 0: if INFO = -i, the i-th argument had an illegal value
118 *> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
119 *> has been completed, but the block diagonal matrix D is
120 *> exactly singular, and division by zero will occur if it
121 *> is used to solve a system of equations.
127 *> \author Univ. of Tennessee
128 *> \author Univ. of California Berkeley
129 *> \author Univ. of Colorado Denver
132 *> \date November 2016
134 *> \ingroup complexSYcomputational
136 * @generated from zhetrf_aasen.f, fortran z -> c, Sun Oct 2 22:29:10 2016
138 * =====================================================================
139 SUBROUTINE CHETRF_AASEN( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
141 * -- LAPACK computational routine (version 3.4.0) --
142 * -- LAPACK is a software package provided by Univ. of Tennessee, --
143 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
148 * .. Scalar Arguments ..
150 INTEGER N, LDA, LWORK, INFO
152 * .. Array Arguments ..
154 COMPLEX A( LDA, * ), WORK( * )
157 * =====================================================================
160 PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) )
162 * .. Local Scalars ..
163 LOGICAL LQUERY, UPPER
164 INTEGER J, LWKOPT, IINFO
165 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
168 * .. External Functions ..
171 EXTERNAL LSAME, ILAENV
173 * .. External Subroutines ..
176 * .. Intrinsic Functions ..
177 INTRINSIC REAL, CONJG, MAX
179 * .. Executable Statements ..
181 * Determine the block size
183 NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
185 * Test the input parameters.
188 UPPER = LSAME( UPLO, 'U' )
189 LQUERY = ( LWORK.EQ.-1 )
190 IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
192 ELSE IF( N.LT.0 ) THEN
194 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
196 ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN
206 CALL XERBLA( 'CHETRF_AASEN', -INFO )
208 ELSE IF( LQUERY ) THEN
219 A( 1, 1 ) = REAL( A( 1, 1 ) )
220 IF ( A( 1, 1 ).EQ.ZERO ) THEN
226 * Adjubst block size based on the workspace size
228 IF( LWORK.LT.((1+NB)*N) ) THEN
234 * .....................................................
235 * Factorize A as L*D*L**T using the upper triangle of A
236 * .....................................................
238 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
240 CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
242 * J is the main loop index, increasing from 1 to N in steps of
243 * JB, where JB is the number of columns factorized by CLAHEF;
244 * JB is either NB, or N-J+1 for the last block
251 * each step of the main loop
252 * J is the last column of the previous panel
253 * J1 is the first column of the current panel
254 * K1 identifies if the previous column of the panel has been
255 * explicitly stored, e.g., K1=1 for the first panel, and
259 JB = MIN( N-J1+1, NB )
262 * Panel factorization
264 CALL CLAHEF_AASEN( UPLO, 2-K1, N-J, JB,
265 $ A( MAX(1, J), J+1 ), LDA,
266 $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ),
268 IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
272 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
274 DO J2 = J+2, MIN(N, J+JB+1)
275 IPIV( J2 ) = IPIV( J2 ) + J
276 IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
277 CALL CSWAP( J1-K1-2, A( 1, J2 ), 1,
278 $ A( 1, IPIV(J2) ), 1 )
283 * Trailing submatrix update, where
284 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
285 * WORK stores the current block of the auxiriarly matrix H
289 * if the first panel and JB=1 (NB=1), then nothing to do
291 IF( J1.GT.1 .OR. JB.GT.1 ) THEN
293 * Merge rank-1 update with BLAS-3 update
295 ALPHA = CONJG( A( J, J+1 ) )
297 CALL CCOPY( N-J, A( J-1, J+1 ), LDA,
298 $ WORK( (J+1-J1+1)+JB*N ), 1 )
299 CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
301 * K1 identifies if the previous column of the panel has been
302 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
303 * and K1=1 and K2=0 for the rest
316 * First update skips the first column
322 NJ = MIN( NB, N-J2+1 )
324 * Update (J2, J2) diagonal block with CGEMV
328 CALL CGEMM( 'Conjugate transpose', 'Transpose',
330 $ -ONE, A( J1-K2, J3 ), LDA,
331 $ WORK( (J3-J1+1)+K1*N ), N,
332 $ ONE, A( J3, J3 ), LDA )
336 * Update off-diagonal block of J2-th block row with CGEMM
338 CALL CGEMM( 'Conjugate transpose', 'Transpose',
340 $ -ONE, A( J1-K2, J2 ), LDA,
341 $ WORK( (J3-J1+1)+K1*N ), N,
342 $ ONE, A( J2, J3 ), LDA )
345 * Recover T( J, J+1 )
347 A( J, J+1 ) = CONJG( ALPHA )
350 * WORK(J+1, 1) stores H(J+1, 1)
352 CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
357 * .....................................................
358 * Factorize A as L*D*L**T using the lower triangle of A
359 * .....................................................
361 * copy first column A(1:N, 1) into H(1:N, 1)
362 * (stored in WORK(1:N))
364 CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
366 * J is the main loop index, increasing from 1 to N in steps of
367 * JB, where JB is the number of columns factorized by CLAHEF;
368 * JB is either NB, or N-J+1 for the last block
375 * each step of the main loop
376 * J is the last column of the previous panel
377 * J1 is the first column of the current panel
378 * K1 identifies if the previous column of the panel has been
379 * explicitly stored, e.g., K1=1 for the first panel, and
383 JB = MIN( N-J1+1, NB )
386 * Panel factorization
388 CALL CLAHEF_AASEN( UPLO, 2-K1, N-J, JB,
389 $ A( J+1, MAX(1, J) ), LDA,
390 $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO)
391 IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
395 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
397 DO J2 = J+2, MIN(N, J+JB+1)
398 IPIV( J2 ) = IPIV( J2 ) + J
399 IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
400 CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA,
401 $ A( IPIV(J2), 1 ), LDA )
406 * Trailing submatrix update, where
407 * A(J2+1, J1-1) stores L(J2+1, J1) and
408 * WORK(J2+1, 1) stores H(J2+1, 1)
412 * if the first panel and JB=1 (NB=1), then nothing to do
414 IF( J1.GT.1 .OR. JB.GT.1 ) THEN
416 * Merge rank-1 update with BLAS-3 update
418 ALPHA = CONJG( A( J+1, J ) )
420 CALL CCOPY( N-J, A( J+1, J-1 ), 1,
421 $ WORK( (J+1-J1+1)+JB*N ), 1 )
422 CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
424 * K1 identifies if the previous column of the panel has been
425 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
426 * and K1=1 and K2=0 for the rest
439 * First update skips the first column
445 NJ = MIN( NB, N-J2+1 )
447 * Update (J2, J2) diagonal block with CGEMV
451 CALL CGEMM( 'No transpose', 'Conjugate transpose',
453 $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
454 $ A( J3, J1-K2 ), LDA,
455 $ ONE, A( J3, J3 ), LDA )
459 * Update off-diagonal block of J2-th block column with CGEMM
461 CALL CGEMM( 'No transpose', 'Conjugate transpose',
463 $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
464 $ A( J2, J1-K2 ), LDA,
465 $ ONE, A( J3, J2 ), LDA )
468 * Recover T( J+1, J )
470 A( J+1, J ) = CONJG( ALPHA )
473 * WORK(J+1, 1) stores H(J+1, 1)
475 CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
483 * End of CHETRF_AASEN