UNSPEC_LD3_LANE
UNSPEC_LD4_LANE
UNSPEC_MB
+ UNSPEC_MOVMEM
UNSPEC_NOP
UNSPEC_PACIA1716
UNSPEC_PACIB1716
}
)
+(define_insn "aarch64_movmemdi"
+ [(parallel [
+ (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+ (clobber (match_operand:DI 0 "register_operand" "+&r"))
+ (clobber (match_operand:DI 1 "register_operand" "+&r"))
+ (set (mem:BLK (match_dup 0))
+ (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_MOVMEM))])]
+ "TARGET_MOPS"
+ "cpyp\t[%x0]!, [%x1]!, %x2!\;cpym\t[%x0]!, [%x1]!, %x2!\;cpye\t[%x0]!, [%x1]!, %x2!"
+ [(set_attr "length" "12")]
+)
+
+;; 0 is dst
+;; 1 is src
+;; 2 is size of copy in bytes
+;; 3 is alignment
+
+(define_expand "movmemdi"
+ [(match_operand:BLK 0 "memory_operand")
+ (match_operand:BLK 1 "memory_operand")
+ (match_operand:DI 2 "general_operand")
+ (match_operand:DI 3 "immediate_operand")]
+ "TARGET_MOPS"
+{
+ rtx sz_reg = operands[2];
+ /* For constant-sized memmoves check the threshold.
+ FIXME: We should add a non-MOPS memmove expansion for smaller,
+ constant-sized memmove to avoid going to a libcall. */
+ if (CONST_INT_P (sz_reg)
+ && INTVAL (sz_reg) < aarch64_mops_memmove_size_threshold)
+ FAIL;
+
+ rtx addr_dst = XEXP (operands[0], 0);
+ rtx addr_src = XEXP (operands[1], 0);
+
+ if (!REG_P (sz_reg))
+ sz_reg = force_reg (DImode, sz_reg);
+ if (!REG_P (addr_dst))
+ addr_dst = force_reg (DImode, addr_dst);
+ if (!REG_P (addr_src))
+ addr_src = force_reg (DImode, addr_src);
+ emit_insn (gen_aarch64_movmemdi (addr_dst, addr_src, sz_reg));
+ DONE;
+}
+)
+
;; 0 is dst
;; 1 is val
;; 2 is size of copy in bytes
-param=aarch64-mops-memcpy-size-threshold=
Target Joined UInteger Var(aarch64_mops_memcpy_size_threshold) Init(256) Param
Constant memcpy size in bytes above which to start using MOPS sequence.
+
+-param=aarch64-mops-memmove-size-threshold=
+Target Joined UInteger Var(aarch64_mops_memmove_size_threshold) Init(0) Param
+Constant memmove size in bytes above which to start using MOPS sequence.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops --param=aarch64-mops-memmove-size-threshold=0" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdlib.h>
+
+/* We want to inline variable-sized memmove.
+** do_it_mov:
+** cpyp \[x1\]\!, \[x0\]\!, x2\!
+** cpym \[x1\]\!, \[x0\]\!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov (char * in, char * out, size_t size)
+{
+ __builtin_memmove (out, in, size);
+}
+
+/*
+** do_it_mov_large:
+** mov x2, 1024
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_large (char * in, char * out)
+{
+ __builtin_memmove (out, in, 1024);
+}
+
+/*
+** do_it_mov_127:
+** mov x2, 127
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_127 (char * in, char * out)
+{
+ __builtin_memmove (out, in, 127);
+}
+
+/*
+** do_it_mov_128:
+** mov x2, 128
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_128 (char * in, char * out)
+{
+ __builtin_memmove (out, in, 128);
+}
+