nvptx: Improved support for HFMode including neghf2 and abshf2

author Roger Sayle <roger@nextmovesoftware.com>

Thu, 3 Feb 2022 08:07:22 +0000 (09:07 +0100)

committer Tom de Vries <tdevries@suse.de>

Thu, 10 Feb 2022 08:01:54 +0000 (09:01 +0100)
author Roger Sayle <roger@nextmovesoftware.com>
Thu, 3 Feb 2022 08:07:22 +0000 (09:07 +0100)
committer Tom de Vries <tdevries@suse.de>
Thu, 10 Feb 2022 08:01:54 +0000 (09:01 +0100)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md

index 7463603..e26d24e 100644 (file)
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -783,6 +783,14 @@
    ""
    "%.\\tsetp%c1\\t%0, %2, %3;")
  
+(define_insn "*cmphf"
+  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+       (match_operator:BI 1 "nvptx_float_comparison_operator"
+          [(match_operand:HF 2 "nvptx_register_operand" "R")
+           (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
+  "TARGET_SM53"
+  "%.\\tsetp%c1\\t%0, %2, %3;")
+
  (define_insn "jump"
    [(set (pc)
         (label_ref (match_operand 0 "" "")))]
@@ -973,6 +981,21 @@
    DONE;
  })
  
+(define_expand "cstorehf4"
+  [(set (match_operand:SI 0 "nvptx_register_operand")
+       (match_operator:SI 1 "nvptx_float_comparison_operator"
+         [(match_operand:HF 2 "nvptx_register_operand")
+          (match_operand:HF 3 "nvptx_nonmemory_operand")]))]
+  "TARGET_SM53"
+{
+  rtx reg = gen_reg_rtx (BImode);
+  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
+                           operands[2], operands[3]);
+  emit_move_insn (reg, cmp);
+  emit_insn (gen_setccsi_from_bi (operands[0], reg));
+  DONE;
+})
+
  ;; Calls
  
  (define_insn "call_insn_<mode>"
@@ -1160,6 +1183,26 @@
    "TARGET_SM53"
    "%.\\tmul.f16\\t%0, %1, %2;")
  
+(define_insn "fmahf4"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+       (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R")
+               (match_operand:HF 2 "nvptx_nonmemory_operand" "RF")
+               (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))]
+  "TARGET_SM53"
+  "%.\\tfma%#.f16\\t%0, %1, %2, %3;")
+
+(define_insn "neghf2"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+       (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
+  ""
+  "%.\\txor.b16\\t%0, %1, -32768;")
+
+(define_insn "abshf2"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+       (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
+  ""
+  "%.\\tand.b16\\t%0, %1, 32767;")
+
  (define_insn "exp2hf2"
    [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
         (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
diff --git a/gcc/testsuite/gcc.target/nvptx/float16-3.c b/gcc/testsuite/gcc.target/nvptx/float16-3.c

new file mode 100644 (file)

index 0000000..914282a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/float16-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -misa=sm_53 -mptx=6.3" } */
+
+_Float16 var;
+
+void neg()
+{
+  var = -var;
+}
+
+/* { dg-final { scan-assembler "xor.b16" } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/float16-4.c b/gcc/testsuite/gcc.target/nvptx/float16-4.c

new file mode 100644 (file)

index 0000000..b11f17a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/float16-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -misa=sm_53 -mptx=6.3 -ffast-math" } */
+
+_Float16 var;
+
+void foo()
+{
+  var = (var < (_Float16)0.0) ? -var : var;
+}
+
+/* { dg-final { scan-assembler "and.b16" } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/float16-5.c b/gcc/testsuite/gcc.target/nvptx/float16-5.c

new file mode 100644 (file)

index 0000000..5fe15ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -misa=sm_53 -mptx=6.3 -ffast-math" } */
+
+_Float16 a;
+_Float16 b;
+_Float16 c;
+_Float16 d;
+
+void foo()
+{
+  a = (_Float16)(b*c) + d;
+}
+
+/* { dg-final { scan-assembler "fma.rn.f16" } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/float16-6.c b/gcc/testsuite/gcc.target/nvptx/float16-6.c

new file mode 100644 (file)

index 0000000..8fe4fa3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/float16-6.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -misa=sm_53 -mptx=6.3" } */
+
+_Float16 x;
+_Float16 y;
+
+_Bool eq()
+{
+  return x == y;
+}
+
+_Bool ne()
+{
+  return x != y;
+}
+
+_Bool lt()
+{
+  return x < y;
+}
+
+_Bool le()
+{
+  return x <= y;
+}
+
+_Bool gt()
+{
+  return x < y;
+}
+
+_Bool ge()
+{
+  return x >= y;
+}
+
+/* { dg-final { scan-assembler-times "setp\.\[a-z\]*\.f16" 6 } } */
+/* { dg-final { scan-assembler-not "cvt.f32.f16" } } */
author	Roger Sayle <roger@nextmovesoftware.com>
	Thu, 3 Feb 2022 08:07:22 +0000 (09:07 +0100)
committer	Tom de Vries <tdevries@suse.de>
	Thu, 10 Feb 2022 08:01:54 +0000 (09:01 +0100)
gcc/config/nvptx/nvptx.md		patch \| blob \| history
gcc/testsuite/gcc.target/nvptx/float16-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/nvptx/float16-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/nvptx/float16-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/nvptx/float16-6.c	[new file with mode: 0644]	patch \| blob