1 *> \brief \b DSYTRD_2STAGE
3 * @generated from zhetrd_2stage.f, fortran z -> d, Sun Nov 6 19:34:06 2016
5 * =========== DOCUMENTATION ===========
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
11 *> Download DSYTRD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd_2stage.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd_2stage.f">
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd_2stage.f">
23 * SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU,
24 * HOUS2, LHOUS2, WORK, LWORK, INFO )
28 * .. Scalar Arguments ..
29 * CHARACTER VECT, UPLO
30 * INTEGER N, LDA, LWORK, LHOUS2, INFO
32 * .. Array Arguments ..
33 * DOUBLE PRECISION D( * ), E( * )
34 * DOUBLE PRECISION A( LDA, * ), TAU( * ),
35 * HOUS2( * ), WORK( * )
44 *> DSYTRD_2STAGE reduces a real symmetric matrix A to real symmetric
45 *> tridiagonal form T by a orthogonal similarity transformation:
46 *> Q1**T Q2**T* A * Q2 * Q1 = T.
54 *> VECT is CHARACTER*1
55 *> = 'N': No need for the Housholder representation,
56 *> in particular for the second stage (Band to
57 *> tridiagonal) and thus LHOUS2 is of size max(1, 4*N);
58 *> = 'V': the Householder representation is needed to
59 *> either generate Q1 Q2 or to apply Q1 Q2,
60 *> then LHOUS2 is to be queried and computed.
61 *> (NOT AVAILABLE IN THIS RELEASE).
66 *> UPLO is CHARACTER*1
67 *> = 'U': Upper triangle of A is stored;
68 *> = 'L': Lower triangle of A is stored.
74 *> The order of the matrix A. N >= 0.
79 *> A is DOUBLE PRECISION array, dimension (LDA,N)
80 *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
81 *> N-by-N upper triangular part of A contains the upper
82 *> triangular part of the matrix A, and the strictly lower
83 *> triangular part of A is not referenced. If UPLO = 'L', the
84 *> leading N-by-N lower triangular part of A contains the lower
85 *> triangular part of the matrix A, and the strictly upper
86 *> triangular part of A is not referenced.
87 *> On exit, if UPLO = 'U', the band superdiagonal
88 *> of A are overwritten by the corresponding elements of the
89 *> internal band-diagonal matrix AB, and the elements above
90 *> the KD superdiagonal, with the array TAU, represent the orthogonal
91 *> matrix Q1 as a product of elementary reflectors; if UPLO
92 *> = 'L', the diagonal and band subdiagonal of A are over-
93 *> written by the corresponding elements of the internal band-diagonal
94 *> matrix AB, and the elements below the KD subdiagonal, with
95 *> the array TAU, represent the orthogonal matrix Q1 as a product
96 *> of elementary reflectors. See Further Details.
102 *> The leading dimension of the array A. LDA >= max(1,N).
107 *> D is DOUBLE PRECISION array, dimension (N)
108 *> The diagonal elements of the tridiagonal matrix T.
113 *> E is DOUBLE PRECISION array, dimension (N-1)
114 *> The off-diagonal elements of the tridiagonal matrix T.
119 *> TAU is DOUBLE PRECISION array, dimension (N-KD)
120 *> The scalar factors of the elementary reflectors of
121 *> the first stage (see Further Details).
126 *> HOUS2 is DOUBLE PRECISION array, dimension LHOUS2, that
127 *> store the Householder representation of the stage2
128 *> band to tridiagonal.
134 *> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension)
135 *> If LWORK = -1, or LHOUS2=-1,
136 *> then a query is assumed; the routine
137 *> only calculates the optimal size of the HOUS2 array, returns
138 *> this value as the first entry of the HOUS2 array, and no error
139 *> message related to LHOUS2 is issued by XERBLA.
140 *> LHOUS2 = MAX(1, dimension) where
141 *> dimension = 4*N if VECT='N'
142 *> not available now if VECT='H'
147 *> WORK is DOUBLE PRECISION array, dimension LWORK.
153 *> The dimension of the array WORK. LWORK = MAX(1, dimension)
154 *> If LWORK = -1, or LHOUS2=-1,
155 *> then a workspace query is assumed; the routine
156 *> only calculates the optimal size of the WORK array, returns
157 *> this value as the first entry of the WORK array, and no error
158 *> message related to LWORK is issued by XERBLA.
159 *> LWORK = MAX(1, dimension) where
160 *> dimension = max(stage1,stage2) + (KD+1)*N
161 *> = N*KD + N*max(KD+1,FACTOPTNB)
162 *> + max(2*KD*KD, KD*NTHREADS)
164 *> where KD is the blocking size of the reduction,
165 *> FACTOPTNB is the blocking used by the QR or LQ
166 *> algorithm, usually FACTOPTNB=128 is a good choice
167 *> NTHREADS is the number of threads used when
168 *> openMP compilation is enabled, otherwise =1.
174 *> = 0: successful exit
175 *> < 0: if INFO = -i, the i-th argument had an illegal value
181 *> \author Univ. of Tennessee
182 *> \author Univ. of California Berkeley
183 *> \author Univ. of Colorado Denver
186 *> \date November 2016
188 *> \ingroup doubleSYcomputational
190 *> \par Further Details:
191 * =====================
195 *> Implemented by Azzam Haidar.
197 *> All details are available on technical report, SC11, SC13 papers.
199 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
200 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
201 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
202 *> of 2011 International Conference for High Performance Computing,
203 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
204 *> Article 8 , 11 pages.
205 *> http://doi.acm.org/10.1145/2063384.2063394
207 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
208 *> An improved parallel singular value algorithm and its implementation
209 *> for multicore hardware, In Proceedings of 2013 International Conference
210 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
211 *> Denver, Colorado, USA, 2013.
212 *> Article 90, 12 pages.
213 *> http://doi.acm.org/10.1145/2503210.2503292
215 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
216 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
217 *> calculations based on fine-grained memory aware tasks.
218 *> International Journal of High Performance Computing Applications.
219 *> Volume 28 Issue 2, Pages 196-209, May 2014.
220 *> http://hpc.sagepub.com/content/28/2/196
224 * =====================================================================
225 SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU,
226 $ HOUS2, LHOUS2, WORK, LWORK, INFO )
230 * -- LAPACK computational routine (version 3.4.0) --
231 * -- LAPACK is a software package provided by Univ. of Tennessee, --
232 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
235 * .. Scalar Arguments ..
237 INTEGER N, LDA, LWORK, LHOUS2, INFO
239 * .. Array Arguments ..
240 DOUBLE PRECISION D( * ), E( * )
241 DOUBLE PRECISION A( LDA, * ), TAU( * ),
242 $ HOUS2( * ), WORK( * )
245 * =====================================================================
247 * .. Local Scalars ..
248 LOGICAL LQUERY, UPPER, WANTQ
249 INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS
251 * .. External Subroutines ..
252 EXTERNAL XERBLA, DSYTRD_SY2SB, DSYTRD_SB2ST
254 * .. External Functions ..
257 EXTERNAL LSAME, ILAENV
259 * .. Executable Statements ..
261 * Test the input parameters
264 WANTQ = LSAME( VECT, 'V' )
265 UPPER = LSAME( UPLO, 'U' )
266 LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 )
268 * Determine the block size, the workspace size and the hous size.
270 KD = ILAENV( 17, 'DSYTRD_2STAGE', VECT, N, -1, -1, -1 )
271 IB = ILAENV( 18, 'DSYTRD_2STAGE', VECT, N, KD, -1, -1 )
272 LHMIN = ILAENV( 19, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 )
273 LWMIN = ILAENV( 20, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 )
274 * WRITE(*,*),'DSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO,
277 IF( .NOT.LSAME( VECT, 'N' ) ) THEN
279 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
281 ELSE IF( N.LT.0 ) THEN
283 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
285 ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN
287 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
297 CALL XERBLA( 'DSYTRD_2STAGE', -INFO )
299 ELSE IF( LQUERY ) THEN
303 * Quick return if possible
310 * Determine pointer position
315 WPOS = ABPOS + LDAB*N
316 CALL DSYTRD_SY2SB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB,
317 $ TAU, WORK( WPOS ), LWRK, INFO )
319 CALL XERBLA( 'DSYTRD_SY2SB', -INFO )
322 CALL DSYTRD_SB2ST( 'Y', VECT, UPLO, N, KD,
323 $ WORK( ABPOS ), LDAB, D, E,
324 $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO )
326 CALL XERBLA( 'DSYTRD_SB2ST', -INFO )
335 * End of DSYTRD_2STAGE