3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download SGBTRF + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgbtrf.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgbtrf.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgbtrf.f">
21 * SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
23 * .. Scalar Arguments ..
24 * INTEGER INFO, KL, KU, LDAB, M, N
26 * .. Array Arguments ..
37 *> SGBTRF computes an LU factorization of a real m-by-n band matrix A
38 *> using partial pivoting with row interchanges.
40 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
49 *> The number of rows of the matrix A. M >= 0.
55 *> The number of columns of the matrix A. N >= 0.
61 *> The number of subdiagonals within the band of A. KL >= 0.
67 *> The number of superdiagonals within the band of A. KU >= 0.
72 *> AB is REAL array, dimension (LDAB,N)
73 *> On entry, the matrix A in band storage, in rows KL+1 to
74 *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
75 *> The j-th column of A is stored in the j-th column of the
76 *> array AB as follows:
77 *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
79 *> On exit, details of the factorization: U is stored as an
80 *> upper triangular band matrix with KL+KU superdiagonals in
81 *> rows 1 to KL+KU+1, and the multipliers used during the
82 *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
83 *> See below for further details.
89 *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
94 *> IPIV is INTEGER array, dimension (min(M,N))
95 *> The pivot indices; for 1 <= i <= min(M,N), row i of the
96 *> matrix was interchanged with row IPIV(i).
102 *> = 0: successful exit
103 *> < 0: if INFO = -i, the i-th argument had an illegal value
104 *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
105 *> has been completed, but the factor U is exactly
106 *> singular, and division by zero will occur if it is used
107 *> to solve a system of equations.
113 *> \author Univ. of Tennessee
114 *> \author Univ. of California Berkeley
115 *> \author Univ. of Colorado Denver
118 *> \date November 2011
120 *> \ingroup realGBcomputational
122 *> \par Further Details:
123 * =====================
127 *> The band storage scheme is illustrated by the following example, when
128 *> M = N = 6, KL = 2, KU = 1:
130 *> On entry: On exit:
132 *> * * * + + + * * * u14 u25 u36
133 *> * * + + + + * * u13 u24 u35 u46
134 *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
135 *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
136 *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
137 *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
139 *> Array elements marked * are not used by the routine; elements marked
140 *> + need not be set on entry, but are required by the routine to store
141 *> elements of U because of fill-in resulting from the row interchanges.
144 * =====================================================================
145 SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
147 * -- LAPACK computational routine (version 3.4.0) --
148 * -- LAPACK is a software package provided by Univ. of Tennessee, --
149 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
152 * .. Scalar Arguments ..
153 INTEGER INFO, KL, KU, LDAB, M, N
155 * .. Array Arguments ..
160 * =====================================================================
164 PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
165 INTEGER NBMAX, LDWORK
166 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
168 * .. Local Scalars ..
169 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
170 $ JU, K2, KM, KV, NB, NW
174 REAL WORK13( LDWORK, NBMAX ),
175 $ WORK31( LDWORK, NBMAX )
177 * .. External Functions ..
178 INTEGER ILAENV, ISAMAX
179 EXTERNAL ILAENV, ISAMAX
181 * .. External Subroutines ..
182 EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL,
183 $ SSWAP, STRSM, XERBLA
185 * .. Intrinsic Functions ..
188 * .. Executable Statements ..
190 * KV is the number of superdiagonals in the factor U, allowing for
195 * Test the input parameters.
200 ELSE IF( N.LT.0 ) THEN
202 ELSE IF( KL.LT.0 ) THEN
204 ELSE IF( KU.LT.0 ) THEN
206 ELSE IF( LDAB.LT.KL+KV+1 ) THEN
210 CALL XERBLA( 'SGBTRF', -INFO )
214 * Quick return if possible
216 IF( M.EQ.0 .OR. N.EQ.0 )
219 * Determine the block size for this environment
221 NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU )
223 * The block size must not exceed the limit set by the size of the
224 * local arrays WORK13 and WORK31.
226 NB = MIN( NB, NBMAX )
228 IF( NB.LE.1 .OR. NB.GT.KL ) THEN
232 CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
237 * Zero the superdiagonal elements of the work array WORK13
241 WORK13( I, J ) = ZERO
245 * Zero the subdiagonal elements of the work array WORK31
249 WORK31( I, J ) = ZERO
253 * Gaussian elimination with partial pivoting
255 * Set fill-in elements in columns KU+2 to KV to zero
257 DO 60 J = KU + 2, MIN( KV, N )
258 DO 50 I = KV - J + 2, KL
263 * JU is the index of the last column affected by the current
264 * stage of the factorization
268 DO 180 J = 1, MIN( M, N ), NB
269 JB = MIN( NB, MIN( M, N )-J+1 )
271 * The active part of the matrix is partitioned
277 * Here A11, A21 and A31 denote the current block of JB columns
278 * which is about to be factorized. The number of rows in the
279 * partitioning are JB, I2, I3 respectively, and the numbers
280 * of columns are JB, J2, J3. The superdiagonal elements of A13
281 * and the subdiagonal elements of A31 lie outside the band.
283 I2 = MIN( KL-JB, M-J-JB+1 )
284 I3 = MIN( JB, M-J-KL+1 )
286 * J2 and J3 are computed after JU has been updated.
288 * Factorize the current block of JB columns
290 DO 80 JJ = J, J + JB - 1
292 * Set fill-in elements in column JJ+KV to zero
294 IF( JJ+KV.LE.N ) THEN
296 AB( I, JJ+KV ) = ZERO
300 * Find pivot and test for singularity. KM is the number of
301 * subdiagonal elements in the current column.
304 JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 )
305 IPIV( JJ ) = JP + JJ - J
306 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
307 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
310 * Apply interchange to columns J to J+JB-1
312 IF( JP+JJ-1.LT.J+KL ) THEN
314 CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
315 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
318 * The interchange affects columns J to JJ-1 of A31
319 * which are stored in the work array WORK31
321 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
322 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
323 CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
324 $ AB( KV+JP, JJ ), LDAB-1 )
328 * Compute multipliers
330 CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
333 * Update trailing submatrix within the band and within
334 * the current block. JM is the index of the last column
335 * which needs to be updated.
337 JM = MIN( JU, J+JB-1 )
339 $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
340 $ AB( KV, JJ+1 ), LDAB-1,
341 $ AB( KV+1, JJ+1 ), LDAB-1 )
344 * If pivot is zero, set INFO to the index of the pivot
345 * unless a zero pivot has already been found.
351 * Copy current column of A31 into the work array WORK31
353 NW = MIN( JJ-J+1, I3 )
355 $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
356 $ WORK31( 1, JJ-J+1 ), 1 )
360 * Apply the row interchanges to the other blocks.
362 J2 = MIN( JU-J+1, KV ) - JB
363 J3 = MAX( 0, JU-J-KV+1 )
365 * Use SLASWP to apply the row interchanges to A12, A22, and
368 CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
371 * Adjust the pivot indices.
373 DO 90 I = J, J + JB - 1
374 IPIV( I ) = IPIV( I ) + J - 1
377 * Apply the row interchanges to A13, A23, and A33
383 DO 100 II = J + I - 1, J + JB - 1
386 TEMP = AB( KV+1+II-JJ, JJ )
387 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
388 AB( KV+1+IP-JJ, JJ ) = TEMP
393 * Update the relevant part of the trailing submatrix
399 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
400 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
401 $ AB( KV+1-JB, J+JB ), LDAB-1 )
407 CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
408 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
409 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
410 $ AB( KV+1, J+JB ), LDAB-1 )
417 CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
418 $ JB, -ONE, WORK31, LDWORK,
419 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
420 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
426 * Copy the lower triangle of A13 into the work array
431 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
435 * Update A13 in the work array
437 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
438 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
445 CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
446 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
447 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
455 CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
456 $ JB, -ONE, WORK31, LDWORK, WORK13,
457 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
460 * Copy the lower triangle of A13 back into place
464 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
470 * Adjust the pivot indices.
472 DO 160 I = J, J + JB - 1
473 IPIV( I ) = IPIV( I ) + J - 1
477 * Partially undo the interchanges in the current block to
478 * restore the upper triangular form of A31 and copy the upper
479 * triangle of A31 back into place
481 DO 170 JJ = J + JB - 1, J, -1
482 JP = IPIV( JJ ) - JJ + 1
485 * Apply interchange to columns J to JJ-1
487 IF( JP+JJ-1.LT.J+KL ) THEN
489 * The interchange does not affect A31
491 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
492 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
495 * The interchange does affect A31
497 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
498 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
502 * Copy the current column of A31 back into place
504 NW = MIN( I3, JJ-J+1 )
506 $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
507 $ AB( KV+KL+1-JJ+J, JJ ), 1 )