THUNDERX2T99: Bug Fixes in D/Z NRM2 and ZGEMM
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Tue, 28 Feb 2017 09:11:38 +0000 (01:11 -0800)
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Tue, 28 Feb 2017 09:11:38 +0000 (01:11 -0800)
kernel/arm64/dznrm2_thunderx2t99.c
kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S

index cf2e86e..a6613d7 100644 (file)
@@ -301,7 +301,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
        : "cc",
          "memory",
          "x0", "x1", "x2", "x3", "x4", "x5",
-         "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
+         "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8"
        );
 
 }
index 55e0178..e5b4cba 100644 (file)
@@ -474,19 +474,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        ld2     {v2.2d, v3.2d}, [pA]
        add     pA, pA, #32
 
-       OP_rr   v20.2d, v0.2d, v8.d[0]
-       OP_ii   v20.2d, v1.2d, v8.d[1]
-       OP_ri   v21.2d, v0.2d, v8.d[1]
-       OP_ir   v21.2d, v1.2d, v8.d[0]
+       OP_rr   v20.2d, v0.2d, v9.d[0]
+       OP_ii   v20.2d, v1.2d, v9.d[1]
+       OP_ri   v21.2d, v0.2d, v9.d[1]
+       OP_ir   v21.2d, v1.2d, v9.d[0]
 
        ldr     q10, [pB]
        ldr     q11, [pB, #16]
        add     pB, pB, #32
 
-       OP_rr   v18.2d, v2.2d, v9.d[0]
-       OP_ii   v18.2d, v3.2d, v9.d[1]
-       OP_ri   v19.2d, v2.2d, v9.d[1]
-       OP_ir   v19.2d, v3.2d, v9.d[0]
+       OP_rr   v18.2d, v2.2d, v8.d[0]
+       OP_ii   v18.2d, v3.2d, v8.d[1]
+       OP_ri   v19.2d, v2.2d, v8.d[1]
+       OP_ir   v19.2d, v3.2d, v8.d[0]
 
        prfm    PLDL1KEEP, [pB, #B_PRE_SIZE]