1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
39 /* This implementation is completely wrong. I'll rewrite this */
44 #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
46 static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
59 for (js = 0; js < m; js += 2){
92 is = ((m - js - 2) >> 1);
122 is = ((m - js - 2) & 1);
144 static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
157 for (js = 0; js < m; js += 2){
173 for (is = 0; is < js; is += 2){
212 for (is = 0; is < js; is += 2){
235 static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
242 FLOAT a11, a21, a31, a41;
243 FLOAT a12, a22, a32, a42;
250 for (js = 0; js < m; js += 2){
292 is = ((m - js - 2) >> 1);
365 static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
372 FLOAT a11, a21, a31, a41;
373 FLOAT a12, a22, a32, a42;
380 for (js = 0; js < m; js += 2){
396 for (is = 0; is < js; is += 2){
458 for (is = 0; is < js; is += 2){
489 static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
496 FLOAT a11, a21, a31, a41;
497 FLOAT a12, a22, a32, a42;
504 for (js = 0; js < m; js += 2){
544 is = ((m - js - 2) >> 1);
616 static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
623 FLOAT a11, a21, a31, a41;
624 FLOAT a12, a22, a32, a42;
631 for (js = 0; js < m; js += 2){
647 for (is = 0; is < js; is += 2){
707 for (is = 0; is < js; is += 2){
738 static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
745 FLOAT a11, a21, a31, a41;
746 FLOAT a12, a22, a32, a42;
753 for (js = 0; js < m; js += 2){
793 is = ((m - js - 2) >> 1);
865 static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
872 FLOAT a11, a21, a31, a41;
873 FLOAT a12, a22, a32, a42;
880 for (js = 0; js < m; js += 2){
896 for (is = 0; is < js; is += 2){
956 for (is = 0; is < js; is += 2){
987 static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1000 for (js = 0; js < m; js += 2){
1033 is = ((m - js - 2) >> 1);
1063 is = ((m - js - 2) & 1);
1085 static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1098 for (js = 0; js < m; js += 2){
1131 is = ((m - js - 2) >> 1);
1161 is = ((m - js - 2) & 1);
1183 static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1196 for (js = 0; js < m; js += 2){
1212 for (is = 0; is < js; is += 2){
1251 for (is = 0; is < js; is += 2){
1273 static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1286 for (js = 0; js < m; js += 2){
1302 for (is = 0; is < js; is += 2){
1341 for (is = 0; is < js; is += 2){
1363 static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1370 FLOAT a11, a21, a31, a41;
1371 FLOAT a12, a22, a32, a42;
1378 for (js = 0; js < m; js += 2){
1420 is = ((m - js - 2) >> 1);
1493 static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1500 FLOAT a11, a21, a31, a41;
1501 FLOAT a12, a22, a32, a42;
1508 for (js = 0; js < m; js += 2){
1550 is = ((m - js - 2) >> 1);
1623 static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1630 FLOAT a11, a21, a31, a41;
1631 FLOAT a12, a22, a32, a42;
1638 for (js = 0; js < m; js += 2){
1654 for (is = 0; is < js; is += 2){
1716 for (is = 0; is < js; is += 2){
1747 static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
1754 FLOAT a11, a21, a31, a41;
1755 FLOAT a12, a22, a32, a42;
1762 for (js = 0; js < m; js += 2){
1778 for (is = 0; is < js; is += 2){
1840 for (is = 0; is < js; is += 2){