From: Bine Brank Date: Sat, 13 Nov 2021 17:48:53 +0000 (+0100) Subject: dtrmm_utcopy sve function X-Git-Tag: upstream/0.3.21~7^2~21^2~9 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7d996b1c365f43fe37fd2127d95c2a82d76f3e2e;p=platform%2Fupstream%2Fopenblas.git dtrmm_utcopy sve function --- diff --git a/kernel/arm64/trmm_utcopy_sve_v1.c b/kernel/arm64/trmm_utcopy_sve_v1.c new file mode 100644 index 00000000..e44e6737 --- /dev/null +++ b/kernel/arm64/trmm_utcopy_sve_v1.c @@ -0,0 +1,128 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#ifdef __ARM_FEATURE_SVE +#include +#endif + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ + + BLASLONG i, js, j; + BLASLONG X; + + int sve_len = svcntd(); + + FLOAT *ao; + js = 0; + svbool_t pn = svwhilelt_b64(js, n); + int n_active = svcntp_b64(svptrue_b64(), pn); + do + { + X = posX; + + if (posX <= posY) { + ao = a + posX + (posY + j) * lda; + } else { + ao = a + posY + (posX + j) * lda; + } + + i = 0; + /* svbool_t pm = svwhilelt_b64(i, m); */ + /* int m_active = svcntp_b64(svptrue_b64(), pm); */ + do + { + if (X < posY) { // optimize this: unroll over DGEMM_UNROLL_M: vl + ao ++; + b += n_active; + X ++; + i ++; + } else + if (X > posY) { + svfloat64_t aj_vec = svld1(pn, ao); + svst1(pn, b, aj_vec); + ao += lda; + b += n_active; + X ++; + i ++; + } else { +#ifdef UNIT + int temp = 0; + for (int j = 0; j < n_active; j++) { + for (int k = 0 ; k < j; k++) { + b[temp++] = *(ao+j*lda+k); + } + b[temp++] = ONE; + for (int k = j+1; k < n_active; k++) { + b[temp++] = ZERO; + } + } +#else + int temp = 0; + for (int j = 0; j < n_active; j++) { + for (int k = 0 ; k <= j; k++) { + b[temp++] = *(ao+j*lda+k); + } + for (int k = j+1; k < n_active; k++) { + b[temp++] = ZERO; + } + } +#endif + ao += n_active * lda; + b += n_active*n_active; + X += n_active; + i += n_active; + } + } while (i < m); + + //printf("\n"); + + + posY += n_active; + js += n_active; + pn = svwhilelt_b64(js, n); + n_active = svcntp_b64(svptrue_b64(), pn); + } while (svptest_any(svptrue_b64(), pn)); + + return 0; +}