Correct dates in changelog from earlier commit.
2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/arm.c (neon_builtin_data): Add vfma and vfms
builtins.
* config/arm/neon-docgen.ml (intrinsic_groups): Add
fused-multiply-* groups.
* config/neon-gen.ml (print_feature_test_start): New function.
(print_feature_test_end): Likewise.
(print_variant): Print feature test macros.
* config/arm/neon-testgen.ml (emit_prologue): Allow different
tests to require different effective targets.
(effective_target): New function.
(test_intrinsic): Specify correct effective targets.
* gcc/config/arm/neon.md (fma<VCVTF:mode>4_intrinsic): New pattern.
(fmsub<VCVTF:mode>4_intrinsic): Likewise.
(neon_vfma<VCVFT:mode>): New expand.
(neon_vfms<VCVFT:mode>): Likewise.
* config/neon.ml (opcode): Add Vfma and Vfms.
(features): Add Requires_feature.
(ops): Add VFMA and VFMS intrinsics.
* config/arm/arm_neon.h: Regenerate.
* doc/arm-neon-intrinsics.texi: Likewise.
2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* gcc.target/arm/neon/vfmaQf32.c: New testcase.
* gcc.target/arm/neon/vfmaf32.c: Likewise.
* gcc.target/arm/neon/vfmsQf32.c: Likewise.
* gcc.target/arm/neon/vfmsf32.c: Likewise.
Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
From-SVN: r192560
+2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+
+ * config/arm/arm.c (neon_builtin_data): Add vfma and vfms
+ builtins.
+ * config/arm/neon-docgen.ml (intrinsic_groups): Add
+ fused-multiply-* groups.
+ * config/neon-gen.ml (print_feature_test_start): New function.
+ (print_feature_test_end): Likewise.
+ (print_variant): Print feature test macros.
+ * config/arm/neon-testgen.ml (emit_prologue): Allow different
+ tests to require different effective targets.
+ (effective_target): New function.
+ (test_intrinsic): Specify correct effective targets.
+ * gcc/config/arm/neon.md (fma<VCVTF:mode>4_intrinsic): New pattern.
+ (fmsub<VCVTF:mode>4_intrinsic): Likewise.
+ (neon_vfma<VCVFT:mode>): New expand.
+ (neon_vfms<VCVFT:mode>): Likewise.
+ * config/neon.ml (opcode): Add Vfma and Vfms.
+ (features): Add Requires_feature.
+ (ops): Add VFMA and VFMS intrinsics.
+ * config/arm/arm_neon.h: Regenerate.
+ * doc/arm-neon-intrinsics.texi: Likewise.
+
2012-10-18 Richard Guenther <rguenther@suse.de>
* lto-streamer.h (enum LTO_tags): Add LTO_integer_cst.
(streamer_pack_tree_bitfields): Call it.
(streamer_write_integer_cst): Adjust.
-2012-10-17 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config.gcc: Add support for ARMv8 for arm*-*-* targets.
VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
+ VAR2 (TERNOP, vfma, v2sf, v4sf),
+ VAR2 (TERNOP, vfms, v2sf, v4sf),
VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1);
}
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+ return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+ return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+ return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+ return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c, 3);
+}
+
+#endif
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsub_s8 (int8x8_t __a, int8x8_t __b)
{
"Multiplication", single_opcode Vmul;
"Multiply-accumulate", single_opcode Vmla;
"Multiply-subtract", single_opcode Vmls;
+ "Fused-multiply-accumulate", single_opcode Vfma;
+ "Fused-multiply-subtract", single_opcode Vfms;
"Subtraction", single_opcode Vsub;
"Comparison (equal-to)", single_opcode Vceq;
"Comparison (greater-than-or-equal-to)", single_opcode Vcge;
| _ -> None
with Not_found -> None
+let print_feature_test_start features =
+ try
+ match List.find (fun feature ->
+ match feature with Requires_feature _ -> true
+ | _ -> false)
+ features with
+ Requires_feature feature ->
+ Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
+ | _ -> assert false
+ with Not_found -> assert true
+
+let print_feature_test_end features =
+ let feature =
+ List.exists (function Requires_feature x -> true
+ | _ -> false) features in
+ if feature then Format.printf "#endif@\n"
+
+
let print_variant opcode features shape name (ctype, asmtype, elttype) =
let bits = infoword_value elttype features in
let modesuf = mode_suffix elttype shape in
return ctype builtin in
let body = pdecls @ rdecls @ stmts
and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
- print_function ctype fnname body
+ begin
+ print_feature_test_start features;
+ print_function ctype fnname body;
+ print_feature_test_end features;
+ end
(* When this function processes the element types in the ops table, it rewrites
them in a list of tuples (a,b,c):
failwith ("Could not create test source file " ^ name ^ ": " ^ str)
(* Emit prologue code to a test source file. *)
-let emit_prologue chan test_name =
+let emit_prologue chan test_name effective_target =
Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name;
Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n";
Printf.fprintf chan "/* { dg-do assemble } */\n";
- Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n";
+ Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n"
+ effective_target;
Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
- Printf.fprintf chan "/* { dg-add-options arm_neon } */\n";
+ Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target;
Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
Printf.fprintf chan "void test_%s (void)\n{\n" test_name
then (Const :: flags, String.sub ty 6 ((String.length ty) - 6))
else (flags, ty)) tys'
+(* Work out what the effective target should be. *)
+let effective_target features =
+ try
+ match List.find (fun feature ->
+ match feature with Requires_feature _ -> true
+ | _ -> false)
+ features with
+ Requires_feature "FMA" -> "arm_neonv2"
+ | _ -> assert false
+ with Not_found -> "arm_neon"
+
(* Given an intrinsic shape, produce a regexp that will match
the right-hand sides of instructions generated by an intrinsic of
that shape. *)
"!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n")
(analyze_all_shapes features shape analyze_shape)
in
+ let effective_target = effective_target features
+ in
(* Emit file and function prologues. *)
- emit_prologue chan test_name;
+ emit_prologue chan test_name effective_target;
(* Emit local variable declarations. *)
emit_automatics chan c_types features;
Printf.fprintf chan "\n";
)
;; Fused multiply-accumulate
+;; We define each insn twice here:
+;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
+;; to be able to use when converting to FMA.
+;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
(define_insn "fma<VCVTF:mode>4"
[(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
(const_string "neon_fp_vmla_qqq")))]
)
+(define_insn "fma<VCVTF:mode>4_intrinsic"
+ [(set (match_operand:VCVTF 0 "register_operand" "=w")
+ (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+ "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq")))]
+)
+
(define_insn "*fmsub<VCVTF:mode>4"
[(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
(const_string "neon_fp_vmla_qqq")))]
)
+(define_insn "fmsub<VCVTF:mode>4_intrinsic"
+ [(set (match_operand:VCVTF 0 "register_operand" "=w")
+ (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+ "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq")))]
+)
+
(define_insn "ior<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
DONE;
})
+(define_expand "neon_vfma<VCVTF:mode>"
+ [(match_operand:VCVTF 0 "s_register_operand")
+ (match_operand:VCVTF 1 "s_register_operand")
+ (match_operand:VCVTF 2 "s_register_operand")
+ (match_operand:VCVTF 3 "s_register_operand")
+ (match_operand:SI 4 "immediate_operand")]
+ "TARGET_NEON && TARGET_FMA"
+{
+ emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
+ operands[1]));
+ DONE;
+})
+
+(define_expand "neon_vfms<VCVTF:mode>"
+ [(match_operand:VCVTF 0 "s_register_operand")
+ (match_operand:VCVTF 1 "s_register_operand")
+ (match_operand:VCVTF 2 "s_register_operand")
+ (match_operand:VCVTF 3 "s_register_operand")
+ (match_operand:SI 4 "immediate_operand")]
+ "TARGET_NEON && TARGET_FMA"
+{
+ emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
+ operands[1]));
+ DONE;
+})
+
; Used for intrinsics when flag_unsafe_math_optimizations is false.
(define_insn "neon_vmla<mode>_unspec"
| Vmul
| Vmla
| Vmls
+ | Vfma
+ | Vfms
| Vsub
| Vceq
| Vcge
| Const_valuator of (int -> int)
| Fixed_vector_reg
| Fixed_core_reg
+ (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
+ | Requires_feature of string
exception MixedMode of elts * elts
Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
+ (* Fused-multiply-accumulate. *)
+ Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32];
+ Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32];
+ Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32];
+ Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32];
+
(* Subtraction. *)
Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
-2012-10-17 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+
+ * gcc.target/arm/neon/vfmaQf32.c: New testcase.
+ * gcc.target/arm/neon/vfmaf32.c: Likewise.
+ * gcc.target/arm/neon/vfmsQf32.c: Likewise.
+ * gcc.target/arm/neon/vfmsf32.c: Likewise.
+
+2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* gcc.target/arm/ftest-armv8a-arm.c: New testcase.
* gcc.target/arm/ftest-armv8a-thumb.c: Likewise.
--- /dev/null
+/* Test the `vfmaQf32' ARM Neon intrinsic. */
+/* This file was autogenerated by neon-testgen. */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neonv2 } */
+
+#include "arm_neon.h"
+
+void test_vfmaQf32 (void)
+{
+ float32x4_t out_float32x4_t;
+ float32x4_t arg0_float32x4_t;
+ float32x4_t arg1_float32x4_t;
+ float32x4_t arg2_float32x4_t;
+
+ out_float32x4_t = vfmaq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t);
+}
+
+/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* Test the `vfmaf32' ARM Neon intrinsic. */
+/* This file was autogenerated by neon-testgen. */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neonv2 } */
+
+#include "arm_neon.h"
+
+void test_vfmaf32 (void)
+{
+ float32x2_t out_float32x2_t;
+ float32x2_t arg0_float32x2_t;
+ float32x2_t arg1_float32x2_t;
+ float32x2_t arg2_float32x2_t;
+
+ out_float32x2_t = vfma_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t);
+}
+
+/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* Test the `vfmsQf32' ARM Neon intrinsic. */
+/* This file was autogenerated by neon-testgen. */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neonv2 } */
+
+#include "arm_neon.h"
+
+void test_vfmsQf32 (void)
+{
+ float32x4_t out_float32x4_t;
+ float32x4_t arg0_float32x4_t;
+ float32x4_t arg1_float32x4_t;
+ float32x4_t arg2_float32x4_t;
+
+ out_float32x4_t = vfmsq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t);
+}
+
+/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* Test the `vfmsf32' ARM Neon intrinsic. */
+/* This file was autogenerated by neon-testgen. */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neonv2 } */
+
+#include "arm_neon.h"
+
+void test_vfmsf32 (void)
+{
+ float32x2_t out_float32x2_t;
+ float32x2_t arg0_float32x2_t;
+ float32x2_t arg1_float32x2_t;
+ float32x2_t arg2_float32x2_t;
+
+ out_float32x2_t = vfms_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t);
+}
+
+/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */