From: Kyrylo Tkachov Date: Mon, 13 Dec 2021 14:13:21 +0000 (+0000) Subject: aarch64: Add memmove expansion for +mops X-Git-Tag: upstream/12.2.0~2764 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bb768f8b45aa7ccf12774aa0c00b295032ee7c47;p=platform%2Fupstream%2Fgcc.git aarch64: Add memmove expansion for +mops This second patch in the series adds an inline movmem expansion for TARGET_MOPS that emits the recommended sequence. A new param aarch64-mops-memmove-size-threshold is added to control the memmove size threshold for this expansion. Its default value is zero to be consistent with the current behaviour where we always emit a libcall, as we don't currently have a movmem inline expansion (we should add a compatible-everywhere inline expansion, but that's for the future), so we should always prefer to emit the MOPS sequence when available in lieu of a libcall. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_movmemdi): Define. (movmemdi): Define. (unspec): Add UNSPEC_MOVMEM. * config/aarch64/aarch64.opt (aarch64-mops-memmove-size-threshold): New param. gcc/testsuite/ChangeLog: * gcc.target/aarch64/mops_2.c: New test. --- diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index d623c1b..b71c171 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -189,6 +189,7 @@ UNSPEC_LD3_LANE UNSPEC_LD4_LANE UNSPEC_MB + UNSPEC_MOVMEM UNSPEC_NOP UNSPEC_PACIA1716 UNSPEC_PACIB1716 @@ -1603,6 +1604,52 @@ } ) +(define_insn "aarch64_movmemdi" + [(parallel [ + (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0)) + (clobber (match_operand:DI 0 "register_operand" "+&r")) + (clobber (match_operand:DI 1 "register_operand" "+&r")) + (set (mem:BLK (match_dup 0)) + (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_MOVMEM))])] + "TARGET_MOPS" + "cpyp\t[%x0]!, [%x1]!, %x2!\;cpym\t[%x0]!, [%x1]!, %x2!\;cpye\t[%x0]!, [%x1]!, %x2!" + [(set_attr "length" "12")] +) + +;; 0 is dst +;; 1 is src +;; 2 is size of copy in bytes +;; 3 is alignment + +(define_expand "movmemdi" + [(match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "general_operand") + (match_operand:DI 3 "immediate_operand")] + "TARGET_MOPS" +{ + rtx sz_reg = operands[2]; + /* For constant-sized memmoves check the threshold. + FIXME: We should add a non-MOPS memmove expansion for smaller, + constant-sized memmove to avoid going to a libcall. */ + if (CONST_INT_P (sz_reg) + && INTVAL (sz_reg) < aarch64_mops_memmove_size_threshold) + FAIL; + + rtx addr_dst = XEXP (operands[0], 0); + rtx addr_src = XEXP (operands[1], 0); + + if (!REG_P (sz_reg)) + sz_reg = force_reg (DImode, sz_reg); + if (!REG_P (addr_dst)) + addr_dst = force_reg (DImode, addr_dst); + if (!REG_P (addr_src)) + addr_src = force_reg (DImode, addr_src); + emit_insn (gen_aarch64_movmemdi (addr_dst, addr_src, sz_reg)); + DONE; +} +) + ;; 0 is dst ;; 1 is val ;; 2 is size of copy in bytes diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 7445ed1..33788ff 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -284,3 +284,7 @@ Target Joined UInteger Var(aarch64_loop_vect_issue_rate_niters) Init(6) IntegerR -param=aarch64-mops-memcpy-size-threshold= Target Joined UInteger Var(aarch64_mops_memcpy_size_threshold) Init(256) Param Constant memcpy size in bytes above which to start using MOPS sequence. + +-param=aarch64-mops-memmove-size-threshold= +Target Joined UInteger Var(aarch64_mops_memmove_size_threshold) Init(0) Param +Constant memmove size in bytes above which to start using MOPS sequence. diff --git a/gcc/testsuite/gcc.target/aarch64/mops_2.c b/gcc/testsuite/gcc.target/aarch64/mops_2.c new file mode 100644 index 0000000..6fda4dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mops_2.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8.6-a+mops --param=aarch64-mops-memmove-size-threshold=0" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* We want to inline variable-sized memmove. +** do_it_mov: +** cpyp \[x1\]\!, \[x0\]\!, x2\! +** cpym \[x1\]\!, \[x0\]\!, x2\! +** cpye \[x1\]\!, \[x0\]\!, x2\! +** ret +*/ +void do_it_mov (char * in, char * out, size_t size) +{ + __builtin_memmove (out, in, size); +} + +/* +** do_it_mov_large: +** mov x2, 1024 +** cpyp \[x1\]\!, \[x0\]!, x2\! +** cpym \[x1\]\!, \[x0\]!, x2\! +** cpye \[x1\]\!, \[x0\]\!, x2\! +** ret +*/ +void do_it_mov_large (char * in, char * out) +{ + __builtin_memmove (out, in, 1024); +} + +/* +** do_it_mov_127: +** mov x2, 127 +** cpyp \[x1\]\!, \[x0\]!, x2\! +** cpym \[x1\]\!, \[x0\]!, x2\! +** cpye \[x1\]\!, \[x0\]\!, x2\! +** ret +*/ +void do_it_mov_127 (char * in, char * out) +{ + __builtin_memmove (out, in, 127); +} + +/* +** do_it_mov_128: +** mov x2, 128 +** cpyp \[x1\]\!, \[x0\]!, x2\! +** cpym \[x1\]\!, \[x0\]!, x2\! +** cpye \[x1\]\!, \[x0\]\!, x2\! +** ret +*/ +void do_it_mov_128 (char * in, char * out) +{ + __builtin_memmove (out, in, 128); +} +