3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download CGBTRF + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgbtrf.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgbtrf.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgbtrf.f">
21 * SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
23 * .. Scalar Arguments ..
24 * INTEGER INFO, KL, KU, LDAB, M, N
26 * .. Array Arguments ..
28 * COMPLEX AB( LDAB, * )
37 *> CGBTRF computes an LU factorization of a complex m-by-n band matrix A
38 *> using partial pivoting with row interchanges.
40 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
49 *> The number of rows of the matrix A. M >= 0.
55 *> The number of columns of the matrix A. N >= 0.
61 *> The number of subdiagonals within the band of A. KL >= 0.
67 *> The number of superdiagonals within the band of A. KU >= 0.
72 *> AB is COMPLEX array, dimension (LDAB,N)
73 *> On entry, the matrix A in band storage, in rows KL+1 to
74 *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
75 *> The j-th column of A is stored in the j-th column of the
76 *> array AB as follows:
77 *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
79 *> On exit, details of the factorization: U is stored as an
80 *> upper triangular band matrix with KL+KU superdiagonals in
81 *> rows 1 to KL+KU+1, and the multipliers used during the
82 *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
83 *> See below for further details.
89 *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
94 *> IPIV is INTEGER array, dimension (min(M,N))
95 *> The pivot indices; for 1 <= i <= min(M,N), row i of the
96 *> matrix was interchanged with row IPIV(i).
102 *> = 0: successful exit
103 *> < 0: if INFO = -i, the i-th argument had an illegal value
104 *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
105 *> has been completed, but the factor U is exactly
106 *> singular, and division by zero will occur if it is used
107 *> to solve a system of equations.
113 *> \author Univ. of Tennessee
114 *> \author Univ. of California Berkeley
115 *> \author Univ. of Colorado Denver
118 *> \date November 2011
120 *> \ingroup complexGBcomputational
122 *> \par Further Details:
123 * =====================
127 *> The band storage scheme is illustrated by the following example, when
128 *> M = N = 6, KL = 2, KU = 1:
130 *> On entry: On exit:
132 *> * * * + + + * * * u14 u25 u36
133 *> * * + + + + * * u13 u24 u35 u46
134 *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
135 *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
136 *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
137 *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
139 *> Array elements marked * are not used by the routine; elements marked
140 *> + need not be set on entry, but are required by the routine to store
141 *> elements of U because of fill-in resulting from the row interchanges.
144 * =====================================================================
145 SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
147 * -- LAPACK computational routine (version 3.4.0) --
148 * -- LAPACK is a software package provided by Univ. of Tennessee, --
149 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
152 * .. Scalar Arguments ..
153 INTEGER INFO, KL, KU, LDAB, M, N
155 * .. Array Arguments ..
157 COMPLEX AB( LDAB, * )
160 * =====================================================================
164 PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
165 $ ZERO = ( 0.0E+0, 0.0E+0 ) )
166 INTEGER NBMAX, LDWORK
167 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
169 * .. Local Scalars ..
170 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
171 $ JU, K2, KM, KV, NB, NW
175 COMPLEX WORK13( LDWORK, NBMAX ),
176 $ WORK31( LDWORK, NBMAX )
178 * .. External Functions ..
179 INTEGER ICAMAX, ILAENV
180 EXTERNAL ICAMAX, ILAENV
182 * .. External Subroutines ..
183 EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
184 $ CSWAP, CTRSM, XERBLA
186 * .. Intrinsic Functions ..
189 * .. Executable Statements ..
191 * KV is the number of superdiagonals in the factor U, allowing for
196 * Test the input parameters.
201 ELSE IF( N.LT.0 ) THEN
203 ELSE IF( KL.LT.0 ) THEN
205 ELSE IF( KU.LT.0 ) THEN
207 ELSE IF( LDAB.LT.KL+KV+1 ) THEN
211 CALL XERBLA( 'CGBTRF', -INFO )
215 * Quick return if possible
217 IF( M.EQ.0 .OR. N.EQ.0 )
220 * Determine the block size for this environment
222 NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
224 * The block size must not exceed the limit set by the size of the
225 * local arrays WORK13 and WORK31.
227 NB = MIN( NB, NBMAX )
229 IF( NB.LE.1 .OR. NB.GT.KL ) THEN
233 CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
238 * Zero the superdiagonal elements of the work array WORK13
242 WORK13( I, J ) = ZERO
246 * Zero the subdiagonal elements of the work array WORK31
250 WORK31( I, J ) = ZERO
254 * Gaussian elimination with partial pivoting
256 * Set fill-in elements in columns KU+2 to KV to zero
258 DO 60 J = KU + 2, MIN( KV, N )
259 DO 50 I = KV - J + 2, KL
264 * JU is the index of the last column affected by the current
265 * stage of the factorization
269 DO 180 J = 1, MIN( M, N ), NB
270 JB = MIN( NB, MIN( M, N )-J+1 )
272 * The active part of the matrix is partitioned
278 * Here A11, A21 and A31 denote the current block of JB columns
279 * which is about to be factorized. The number of rows in the
280 * partitioning are JB, I2, I3 respectively, and the numbers
281 * of columns are JB, J2, J3. The superdiagonal elements of A13
282 * and the subdiagonal elements of A31 lie outside the band.
284 I2 = MIN( KL-JB, M-J-JB+1 )
285 I3 = MIN( JB, M-J-KL+1 )
287 * J2 and J3 are computed after JU has been updated.
289 * Factorize the current block of JB columns
291 DO 80 JJ = J, J + JB - 1
293 * Set fill-in elements in column JJ+KV to zero
295 IF( JJ+KV.LE.N ) THEN
297 AB( I, JJ+KV ) = ZERO
301 * Find pivot and test for singularity. KM is the number of
302 * subdiagonal elements in the current column.
305 JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
306 IPIV( JJ ) = JP + JJ - J
307 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
308 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
311 * Apply interchange to columns J to J+JB-1
313 IF( JP+JJ-1.LT.J+KL ) THEN
315 CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
316 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
319 * The interchange affects columns J to JJ-1 of A31
320 * which are stored in the work array WORK31
322 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
323 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
324 CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
325 $ AB( KV+JP, JJ ), LDAB-1 )
329 * Compute multipliers
331 CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
334 * Update trailing submatrix within the band and within
335 * the current block. JM is the index of the last column
336 * which needs to be updated.
338 JM = MIN( JU, J+JB-1 )
340 $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
341 $ AB( KV, JJ+1 ), LDAB-1,
342 $ AB( KV+1, JJ+1 ), LDAB-1 )
345 * If pivot is zero, set INFO to the index of the pivot
346 * unless a zero pivot has already been found.
352 * Copy current column of A31 into the work array WORK31
354 NW = MIN( JJ-J+1, I3 )
356 $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
357 $ WORK31( 1, JJ-J+1 ), 1 )
361 * Apply the row interchanges to the other blocks.
363 J2 = MIN( JU-J+1, KV ) - JB
364 J3 = MAX( 0, JU-J-KV+1 )
366 * Use CLASWP to apply the row interchanges to A12, A22, and
369 CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
372 * Adjust the pivot indices.
374 DO 90 I = J, J + JB - 1
375 IPIV( I ) = IPIV( I ) + J - 1
378 * Apply the row interchanges to A13, A23, and A33
384 DO 100 II = J + I - 1, J + JB - 1
387 TEMP = AB( KV+1+II-JJ, JJ )
388 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
389 AB( KV+1+IP-JJ, JJ ) = TEMP
394 * Update the relevant part of the trailing submatrix
400 CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
401 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
402 $ AB( KV+1-JB, J+JB ), LDAB-1 )
408 CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
409 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
410 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
411 $ AB( KV+1, J+JB ), LDAB-1 )
418 CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
419 $ JB, -ONE, WORK31, LDWORK,
420 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
421 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
427 * Copy the lower triangle of A13 into the work array
432 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
436 * Update A13 in the work array
438 CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
439 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
446 CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
447 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
448 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
456 CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
457 $ JB, -ONE, WORK31, LDWORK, WORK13,
458 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
461 * Copy the lower triangle of A13 back into place
465 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
471 * Adjust the pivot indices.
473 DO 160 I = J, J + JB - 1
474 IPIV( I ) = IPIV( I ) + J - 1
478 * Partially undo the interchanges in the current block to
479 * restore the upper triangular form of A31 and copy the upper
480 * triangle of A31 back into place
482 DO 170 JJ = J + JB - 1, J, -1
483 JP = IPIV( JJ ) - JJ + 1
486 * Apply interchange to columns J to JJ-1
488 IF( JP+JJ-1.LT.J+KL ) THEN
490 * The interchange does not affect A31
492 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
493 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
496 * The interchange does affect A31
498 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
499 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
503 * Copy the current column of A31 back into place
505 NW = MIN( I3, JJ-J+1 )
507 $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
508 $ AB( KV+KL+1-JJ+J, JJ ), 1 )