dtrmm_utcopy sve function
authorBine Brank <binebrank@gmail.com>
Sat, 13 Nov 2021 17:48:53 +0000 (18:48 +0100)
committerBine Brank <binebrank@gmail.com>
Sat, 13 Nov 2021 17:48:53 +0000 (18:48 +0100)
kernel/arm64/trmm_utcopy_sve_v1.c [new file with mode: 0644]

diff --git a/kernel/arm64/trmm_utcopy_sve_v1.c b/kernel/arm64/trmm_utcopy_sve_v1.c
new file mode 100644 (file)
index 0000000..e44e673
--- /dev/null
@@ -0,0 +1,128 @@
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin.           */
+/* All rights reserved.                                              */
+/*                                                                   */
+/* Redistribution and use in source and binary forms, with or        */
+/* without modification, are permitted provided that the following   */
+/* conditions are met:                                               */
+/*                                                                   */
+/*   1. Redistributions of source code must retain the above         */
+/*      copyright notice, this list of conditions and the following  */
+/*      disclaimer.                                                  */
+/*                                                                   */
+/*   2. Redistributions in binary form must reproduce the above      */
+/*      copyright notice, this list of conditions and the following  */
+/*      disclaimer in the documentation and/or other materials       */
+/*      provided with the distribution.                              */
+/*                                                                   */
+/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
+/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
+/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
+/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
+/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
+/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
+/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
+/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
+/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
+/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
+/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
+/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
+/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
+/*    POSSIBILITY OF SUCH DAMAGE.                                    */
+/*                                                                   */
+/* The views and conclusions contained in the software and           */
+/* documentation are those of the authors and should not be          */
+/* interpreted as representing official policies, either expressed   */
+/* or implied, of The University of Texas at Austin.                 */
+/*********************************************************************/
+
+#include <stdio.h>
+#include "common.h"
+
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){
+
+    BLASLONG i, js, j;
+    BLASLONG X;
+
+    int sve_len = svcntd();
+
+    FLOAT *ao;
+    js = 0;
+    svbool_t pn = svwhilelt_b64(js, n);
+    int n_active = svcntp_b64(svptrue_b64(), pn);
+    do
+    {
+        X = posX;
+
+        if (posX <= posY) {
+            ao = a + posX + (posY + j) * lda;
+        } else {
+            ao = a + posY + (posX + j) * lda;
+        }
+
+        i = 0;
+        /* svbool_t pm = svwhilelt_b64(i, m); */
+        /* int m_active = svcntp_b64(svptrue_b64(), pm); */
+        do 
+        {
+            if (X < posY) { // optimize this: unroll over DGEMM_UNROLL_M: vl
+                ao ++;
+                b += n_active;
+                X ++;
+                i ++;
+            } else 
+                if (X > posY) {
+                    svfloat64_t aj_vec = svld1(pn, ao);
+                    svst1(pn, b, aj_vec);
+                    ao += lda;
+                    b += n_active;
+                    X ++;
+                    i ++;
+                } else {
+#ifdef UNIT
+                    int temp = 0;
+                    for (int j = 0; j < n_active; j++) {
+                        for (int k = 0 ; k < j; k++) {
+                            b[temp++] = *(ao+j*lda+k);
+                        }
+                        b[temp++] = ONE;
+                        for (int k = j+1; k < n_active; k++) {
+                            b[temp++] = ZERO;
+                        }
+                    }
+#else 
+                    int temp = 0;
+                    for (int j = 0; j < n_active; j++) {
+                        for (int k = 0 ; k <= j; k++) {
+                            b[temp++] = *(ao+j*lda+k);
+                        }
+                        for (int k = j+1; k < n_active; k++) {
+                            b[temp++] = ZERO;
+                        }
+                    }
+#endif
+                    ao += n_active * lda;
+                    b += n_active*n_active;
+                    X += n_active;
+                    i += n_active;
+                }
+        } while (i < m);
+
+        //printf("\n");
+
+
+        posY += n_active;
+        js += n_active;
+        pn = svwhilelt_b64(js, n);
+        n_active = svcntp_b64(svptrue_b64(), pn);
+    } while (svptest_any(svptrue_b64(), pn));
+
+    return 0;
+}