From 742e3a7781c7f29136ccc36673ef2c887ba2860d Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Tue, 14 Feb 2017 15:23:12 -0800 Subject: [PATCH] Add self to aarch64 maintainers. Fix mla instruction. sim/ * MAINTAINTERS (aarch64): Add myself. sim/aarch64/ * simulator.c (do_vec_MLA): Rewrite switch body. sim/testsuite/sim/aarch64/ * mla.s: New. --- sim/ChangeLog | 4 ++ sim/MAINTAINERS | 1 + sim/aarch64/ChangeLog | 2 + sim/aarch64/simulator.c | 65 ++++++----------------- sim/testsuite/sim/aarch64/ChangeLog | 2 + sim/testsuite/sim/aarch64/mla.s | 103 ++++++++++++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 49 deletions(-) create mode 100644 sim/testsuite/sim/aarch64/mla.s diff --git a/sim/ChangeLog b/sim/ChangeLog index ba946fd..6d0c5fd 100644 --- a/sim/ChangeLog +++ b/sim/ChangeLog @@ -1,3 +1,7 @@ +2017-02-14 Jim Wilson + + * MAINTAINTERS (aarch64): Add myself. + 2016-12-14 Maciej W. Rozycki * MAINTAINERS (Maintainers for particular sims): Add myself as diff --git a/sim/MAINTAINERS b/sim/MAINTAINERS index b373f66..62887d4 100644 --- a/sim/MAINTAINERS +++ b/sim/MAINTAINERS @@ -14,6 +14,7 @@ Mike Frysinger vapier@gentoo.org Maintainers for particular sims: aarch64 Nick Clifton +aarch64 Jim Wilson arm Nick Clifton bfin Mike Frysinger cr16 M R Swami Reddy diff --git a/sim/aarch64/ChangeLog b/sim/aarch64/ChangeLog index 7d00621..e8d66a6 100644 --- a/sim/aarch64/ChangeLog +++ b/sim/aarch64/ChangeLog @@ -1,5 +1,7 @@ 2017-02-14 Jim Wilson + * simulator.c (do_vec_MLA): Rewrite switch body. + * simulator.c (do_vec_bit): Change loop limits from 16 and 8 to 4 and 2. Move test_false if inside loop. Fix logic for computing result stored to vd. diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c index 13a2b1f..7c28219 100644 --- a/sim/aarch64/simulator.c +++ b/sim/aarch64/simulator.c @@ -3799,63 +3799,30 @@ do_vec_MLA (sim_cpu *cpu) switch (INSTR (23, 22)) { case 0: - { - uint16_t a[16], b[16]; - - for (i = 0; i < (full ? 16 : 8); i++) - { - a[i] = aarch64_get_vec_u8 (cpu, vn, i); - b[i] = aarch64_get_vec_u8 (cpu, vm, i); - } - - for (i = 0; i < (full ? 16 : 8); i++) - { - uint16_t v = aarch64_get_vec_u8 (cpu, vd, i); - - aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i])); - } - } + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vd, i) + + (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i))); return; case 1: - { - uint32_t a[8], b[8]; - - for (i = 0; i < (full ? 8 : 4); i++) - { - a[i] = aarch64_get_vec_u16 (cpu, vn, i); - b[i] = aarch64_get_vec_u16 (cpu, vm, i); - } - - for (i = 0; i < (full ? 8 : 4); i++) - { - uint32_t v = aarch64_get_vec_u16 (cpu, vd, i); - - aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i])); - } - } + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vd, i) + + (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i))); return; case 2: - { - uint64_t a[4], b[4]; - - for (i = 0; i < (full ? 4 : 2); i++) - { - a[i] = aarch64_get_vec_u32 (cpu, vn, i); - b[i] = aarch64_get_vec_u32 (cpu, vm, i); - } - - for (i = 0; i < (full ? 4 : 2); i++) - { - uint64_t v = aarch64_get_vec_u32 (cpu, vd, i); - - aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i])); - } - } + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vd, i) + + (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i))); return; - case 3: + default: HALT_UNALLOC; } } diff --git a/sim/testsuite/sim/aarch64/ChangeLog b/sim/testsuite/sim/aarch64/ChangeLog index d47abc5..a17b977 100644 --- a/sim/testsuite/sim/aarch64/ChangeLog +++ b/sim/testsuite/sim/aarch64/ChangeLog @@ -1,5 +1,7 @@ 2017-02-14 Jim Wilson + * mla.s: New. + * bit.s: New. * ldn_single.s: New. diff --git a/sim/testsuite/sim/aarch64/mla.s b/sim/testsuite/sim/aarch64/mla.s new file mode 100644 index 0000000..e0065e7 --- /dev/null +++ b/sim/testsuite/sim/aarch64/mla.s @@ -0,0 +1,103 @@ +# mach: aarch64 + +# Check the vector multiply add instruction: mla. + +.include "testutils.inc" + +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d +m8b: + .word 0x110a0502 + .word 0x4132251a +m16b: + .word 0x110a0502 + .word 0x4132251a + .word 0x917a6552 + .word 0x01e2c5aa +m4h: + .word 0x180a0402 + .word 0x70323c1a +m8h: + .word 0x180a0402 + .word 0x70323c1a + .word 0x087ab452 + .word 0xe0e26caa +m2s: + .word 0x140a0402 + .word 0xa46a3c1a +m4s: + .word 0x140a0402 + .word 0xa46a3c1a + .word 0xb52ab452 + .word 0x464b6caa + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + + movi v1.8b, #1 + mla v1.8b, v0.8b, v0.8b + mov x1, v1.d[0] + adrp x3, m8b + ldr x4, [x3, #:lo12:m8b] + cmp x1, x4 + bne .Lfailure + + movi v1.16b, #1 + mla v1.16b, v0.16b, v0.16b + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m16b + ldr x4, [x3, #:lo12:m16b] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m16b+8] + cmp x2, x5 + bne .Lfailure + + movi v1.4h, #1 + mla v1.4h, v0.4h, v0.4h + mov x1, v1.d[0] + adrp x3, m4h + ldr x4, [x3, #:lo12:m4h] + cmp x1, x4 + bne .Lfailure + + movi v1.8h, #1 + mla v1.8h, v0.8h, v0.8h + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m8h + ldr x4, [x3, #:lo12:m8h] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m8h+8] + cmp x2, x5 + bne .Lfailure + + movi v1.2s, #1 + mla v1.2s, v0.2s, v0.2s + mov x1, v1.d[0] + adrp x3, m2s + ldr x4, [x3, #:lo12:m2s] + cmp x1, x4 + bne .Lfailure + + movi v1.4s, #1 + mla v1.4s, v0.4s, v0.4s + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m4s + ldr x4, [x3, #:lo12:m4s] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m4s+8] + cmp x2, x5 + bne .Lfailure + + pass +.Lfailure: + fail -- 2.7.4