2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/constraints.md (vsb): New constraint.
+ (vsm): Generalize description.
+ * config/aarch64/iterators.md (SVE_INT_BINARY_IMM): New code
+ iterator.
+ (sve_imm_con): Handle smax, smin, umax and umin.
+ (sve_imm_prefix): New code attribute.
+ * config/aarch64/predicates.md (aarch64_sve_vsb_immediate)
+ (aarch64_sve_vsb_operand): New predicates.
+ (aarch64_sve_mul_immediate): Rename to...
+ (aarch64_sve_vsm_immediate): ...this.
+ (aarch64_sve_mul_operand): Rename to...
+ (aarch64_sve_vsm_operand): ...this.
+ * config/aarch64/aarch64-sve.md (mul<mode>3): Generalize to...
+ (<SVE_INT_BINARY_IMM:optab><SVE_I:mode>3): ...this.
+ (*mul<mode>3, *post_ra_mul<mode>3): Generalize to...
+ (*<SVE_INT_BINARY_IMM:optab><SVE_I:mode>3)
+ (*post_ra_<SVE_INT_BINARY_IMM:optab><SVE_I:mode>3): ...these and
+ add movprfx support for the immediate alternatives.
+ (<su><maxmin><mode>3, *<su><maxmin><mode>3): Delete in favor
+ of the above.
+ (*<SVE_INT_BINARY_SD:optab><SVE_SDI:mode>3): Fix incorrect predicate
+ for operand 3.
+
+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+
* config/aarch64/predicates.md (aarch64_simd_imm_one): New predicate.
* config/aarch64/aarch64-sve.md (*cnot<mode>): New pattern.
(*cond_cnot<mode>_2, *cond_cnot<mode>_any): Likewise.
;; ---- [INT] Subtraction
;; ---- [INT] Take address
;; ---- [INT] Absolute difference
-;; ---- [INT] Multiplication
;; ---- [INT] Highpart multiplication
;; ---- [INT] Division
;; ---- [INT] Binary logical operations
;; ---- [INT] Binary logical operations (inverted second input)
;; ---- [INT] Shifts
-;; ---- [INT] Maximum and minimum
;; ---- [FP] General binary arithmetic corresponding to rtx codes
;; ---- [FP] General binary arithmetic corresponding to unspecs
;; ---- [FP] Addition
;; -------------------------------------------------------------------------
;; ---- [INT] General binary arithmetic corresponding to rtx codes
;; -------------------------------------------------------------------------
-;; Includes merging patterns for:
-;; - ADD
-;; - AND
-;; - EOR
+;; Includes:
+;; - ADD (merging form only)
+;; - AND (merging form only)
+;; - EOR (merging form only)
;; - MUL
-;; - ORR
+;; - ORR (merging form only)
;; - SMAX
;; - SMIN
-;; - SUB
+;; - SUB (merging form only)
;; - UMAX
;; - UMIN
;; -------------------------------------------------------------------------
+;; Unpredicated integer binary operations that have an immediate form.
+(define_expand "<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_dup 3)
+ (SVE_INT_BINARY_IMM:SVE_I
+ (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Integer binary operations that have an immediate form, predicated
+;; with a PTRUE. We don't actually need the predicate for the first
+;; and third alternatives, but using Upa or X isn't likely to gain much
+;; and would make the instruction seem less uniform to the register
+;; allocator.
+(define_insn_and_split "*<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (SVE_INT_BINARY_IMM:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
+ "@
+ #
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && !register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
+ ""
+ [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Unpredicated binary operations with a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (SVE_INT_BINARY_IMM:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "0, w")
+ (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+ "TARGET_SVE && reload_completed"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated integer operations with merging.
(define_expand "cond_<optab><mode>"
[(set (match_operand:SVE_I 0 "register_operand")
)
;; -------------------------------------------------------------------------
-;; ---- [INT] Multiplication
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - MUL
-;; -------------------------------------------------------------------------
-
-;; Unpredicated multiplication.
-(define_expand "mul<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 3)
- (mult:SVE_I
- (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
- UNSPEC_PRED_X))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; Multiplication predicated with a PTRUE. We don't actually need the
-;; predicate for the first alternative, but using Upa or X isn't likely
-;; to gain much and would make the instruction seem less uniform to the
-;; register allocator.
-(define_insn_and_split "*mul<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (mult:SVE_I
- (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
- (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
- UNSPEC_PRED_X))]
- "TARGET_SVE"
- "@
- #
- mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && !register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
- ""
- [(set_attr "movprfx" "*,*,yes")]
-)
-
-;; Unpredicated multiplications by a constant (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
-(define_insn "*post_ra_mul<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (mult:SVE_I
- (match_operand:SVE_I 1 "register_operand" "0")
- (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
- "TARGET_SVE && reload_completed"
- "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
-)
-
-;; Merging forms are handled through SVE_INT_BINARY.
-
-;; -------------------------------------------------------------------------
;; ---- [INT] Highpart multiplication
;; -------------------------------------------------------------------------
;; Includes:
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(SVE_INT_BINARY_SD:SVE_SDI
(match_operand:SVE_SDI 2 "register_operand" "0, w, w")
- (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
+ (match_operand:SVE_SDI 3 "register_operand" "w, 0, w"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
"@
)
;; -------------------------------------------------------------------------
-;; ---- [INT] Maximum and minimum
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - SMAX
-;; - SMIN
-;; - UMAX
-;; - UMIN
-;; -------------------------------------------------------------------------
-
-;; Unpredicated integer MAX/MIN.
-(define_expand "<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 3)
- (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "register_operand"))]
- UNSPEC_PRED_X))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; Integer MAX/MIN predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))]
- UNSPEC_PRED_X))]
- "TARGET_SVE"
- "@
- <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Merging forms are handled through SVE_INT_BINARY.
-
-;; -------------------------------------------------------------------------
;; ---- [FP] General binary arithmetic corresponding to rtx codes
;; -------------------------------------------------------------------------
;; Includes post-RA forms of:
arithmetic instructions."
(match_operand 0 "aarch64_sve_arith_immediate"))
+(define_constraint "vsb"
+ "@internal
+ A constraint that matches an immediate operand valid for SVE UMAX
+ and UMIN operations."
+ (match_operand 0 "aarch64_sve_vsb_immediate"))
+
(define_constraint "vsc"
"@internal
A constraint that matches a signed immediate operand valid for SVE
(define_constraint "vsm"
"@internal
- A constraint that matches an immediate operand valid for SVE MUL
- operations."
- (match_operand 0 "aarch64_sve_mul_immediate"))
+ A constraint that matches an immediate operand valid for SVE MUL,
+ SMAX and SMIN operations."
+ (match_operand 0 "aarch64_sve_vsm_immediate"))
(define_constraint "vsA"
"@internal
;; SVE integer binary division operations.
(define_code_iterator SVE_INT_BINARY_SD [div udiv])
+;; SVE integer binary operations that have an immediate form.
+(define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin])
+
;; SVE floating-point operations with an unpredicated all-register form.
(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
(mult "fmul")])
;; The SVE immediate constraint to use for an rtl code.
-(define_code_attr sve_imm_con [(eq "vsc")
+(define_code_attr sve_imm_con [(mult "vsm")
+ (smax "vsm")
+ (smin "vsm")
+ (umax "vsb")
+ (umin "vsb")
+ (eq "vsc")
(ne "vsc")
(lt "vsc")
(ge "vsc")
(geu "vsd")
(gtu "vsd")])
+;; The prefix letter to use when printing an immediate operand.
+(define_code_attr sve_imm_prefix [(mult "")
+ (smax "")
+ (smin "")
+ (umax "D")
+ (umin "D")])
+
;; -------------------------------------------------------------------
;; Int Iterators.
;; -------------------------------------------------------------------
(and (match_code "const,const_vector")
(match_test "aarch64_sve_bitmask_immediate_p (op)")))
-(define_predicate "aarch64_sve_mul_immediate"
+;; Used for SVE UMAX and UMIN.
+(define_predicate "aarch64_sve_vsb_immediate"
+ (and (match_code "const_vector")
+ (match_test "GET_MODE_INNER (GET_MODE (op)) == QImode
+ ? aarch64_const_vec_all_same_in_range_p (op, -128, 127)
+ : aarch64_const_vec_all_same_in_range_p (op, 0, 255)")))
+
+;; Used for SVE MUL, SMAX and SMIN.
+(define_predicate "aarch64_sve_vsm_immediate"
(and (match_code "const,const_vector")
(match_test "aarch64_const_vec_all_same_in_range_p (op, -128, 127)")))
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_simd_rshift_imm")))
-(define_predicate "aarch64_sve_mul_operand"
+(define_predicate "aarch64_sve_vsb_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "aarch64_sve_vsb_immediate")))
+
+(define_predicate "aarch64_sve_vsm_operand"
(ior (match_operand 0 "register_operand")
- (match_operand 0 "aarch64_sve_mul_immediate")))
+ (match_operand 0 "aarch64_sve_vsm_immediate")))
(define_predicate "aarch64_sve_cmp_vsc_operand"
(ior (match_operand 0 "register_operand")
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/smax_1.c: New test.
+ * gcc.target/aarch64/sve/smin_1.c: Likewise.
+ * gcc.target/aarch64/sve/umax_1.c: Likewise.
+ * gcc.target/aarch64/sve/umin_1.c: Likewise.
+
+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/cnot_1.c: New test.
* gcc.target/aarch64/sve/cond_cnot_1.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_1_run.c: Likewise.
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE) \
+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \
+}
+
+#define DO_ARITH_OPS(TYPE) \
+ DO_REGREG_OPS (TYPE); \
+ DO_IMMEDIATE_OPS (0, TYPE, 0); \
+ DO_IMMEDIATE_OPS (86, TYPE, 86); \
+ DO_IMMEDIATE_OPS (109, TYPE, 109); \
+ DO_IMMEDIATE_OPS (141, TYPE, 141); \
+ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \
+ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \
+ DO_IMMEDIATE_OPS (-141, TYPE, minus141);
+
+DO_ARITH_OPS (int8_t)
+DO_ARITH_OPS (int16_t)
+DO_ARITH_OPS (int32_t)
+DO_ARITH_OPS (int64_t)
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE) \
+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \
+}
+
+#define DO_ARITH_OPS(TYPE) \
+ DO_REGREG_OPS (TYPE); \
+ DO_IMMEDIATE_OPS (0, TYPE, 0); \
+ DO_IMMEDIATE_OPS (86, TYPE, 86); \
+ DO_IMMEDIATE_OPS (109, TYPE, 109); \
+ DO_IMMEDIATE_OPS (141, TYPE, 141); \
+ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \
+ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \
+ DO_IMMEDIATE_OPS (-141, TYPE, minus141);
+
+DO_ARITH_OPS (int8_t)
+DO_ARITH_OPS (int16_t)
+DO_ARITH_OPS (int32_t)
+DO_ARITH_OPS (int64_t)
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE) \
+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE) \
+void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \
+}
+
+#define DO_ARITH_OPS(TYPE) \
+ DO_REGREG_OPS (TYPE); \
+ DO_IMMEDIATE_OPS (2, TYPE); \
+ DO_IMMEDIATE_OPS (86, TYPE); \
+ DO_IMMEDIATE_OPS (109, TYPE); \
+ DO_IMMEDIATE_OPS (141, TYPE); \
+ DO_IMMEDIATE_OPS (229, TYPE); \
+ DO_IMMEDIATE_OPS (255, TYPE); \
+ DO_IMMEDIATE_OPS (256, TYPE);
+
+DO_ARITH_OPS (uint8_t)
+DO_ARITH_OPS (uint16_t)
+DO_ARITH_OPS (uint32_t)
+DO_ARITH_OPS (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */
+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE) \
+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE) \
+void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \
+{ \
+ for (int i = 0; i < count; ++i) \
+ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \
+}
+
+#define DO_ARITH_OPS(TYPE) \
+ DO_REGREG_OPS (TYPE); \
+ DO_IMMEDIATE_OPS (2, TYPE); \
+ DO_IMMEDIATE_OPS (86, TYPE); \
+ DO_IMMEDIATE_OPS (109, TYPE); \
+ DO_IMMEDIATE_OPS (141, TYPE); \
+ DO_IMMEDIATE_OPS (229, TYPE); \
+ DO_IMMEDIATE_OPS (255, TYPE); \
+ DO_IMMEDIATE_OPS (256, TYPE);
+
+DO_ARITH_OPS (uint8_t)
+DO_ARITH_OPS (uint16_t)
+DO_ARITH_OPS (uint32_t)
+DO_ARITH_OPS (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */
+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */