[NVPTX] Support neg{.ftz} for f16 and f16x2

author Jakub Chlanda <j.chlanda@gmail.com>

Thu, 13 Oct 2022 17:40:00 +0000 (10:40 -0700)

committer Artem Belevich <tra@google.com>

Thu, 13 Oct 2022 17:48:33 +0000 (10:48 -0700)
author Jakub Chlanda <j.chlanda@gmail.com>
Thu, 13 Oct 2022 17:40:00 +0000 (10:40 -0700)
committer Artem Belevich <tra@google.com>
Thu, 13 Oct 2022 17:48:33 +0000 (10:48 -0700)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

index b88a20a..090bf01 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -564,9 +564,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
      setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
    }
  
-  // There's no neg.f16 instruction. Expand to (0-x).
-  setOperationAction(ISD::FNEG, MVT::f16, Expand);
-  setOperationAction(ISD::FNEG, MVT::v2f16, Expand);
+  // f16/f16x2 neg was introduced in PTX 60, SM_53.
+  const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
+                                        STI.getPTXVersion() >= 60 &&
+                                        STI.allowFP16Math();
+  for (const auto &VT : {MVT::f16, MVT::v2f16})
+    setOperationAction(ISD::FNEG, VT,
+                       IsFP16FP16x2NegAvailable ? Legal : Expand);
  
    // (would be) Library functions.
  
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

index a4c67cd..d9c3e36 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -922,6 +922,19 @@ defm FNEG  : F2<"neg", fneg>;
  defm FSQRT : F2<"sqrt.rn", fsqrt>;
  
  //
+// F16 NEG
+//
+class FNEG_F16_F16X2<string OpcStr, RegisterClass RC, Predicate Pred> :
+      NVPTXInst<(outs RC:$dst), (ins RC:$src),
+                !strconcat(OpcStr, " \t$dst, $src;"),
+                [(set RC:$dst, (fneg RC:$src))]>,
+                Requires<[useFP16Math, hasPTX60, hasSM53, Pred]>;
+def FNEG16_ftz   : FNEG_F16_F16X2<"neg.ftz.f16", Float16Regs, doF32FTZ>;
+def FNEG16       : FNEG_F16_F16X2<"neg.f16", Float16Regs, True>;
+def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", Float16x2Regs, doF32FTZ>;
+def FNEG16x2     : FNEG_F16_F16X2<"neg.f16x2", Float16x2Regs, True>;
+
+//
  // F64 division
  //
  def FDIV641r :
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll

index 4f59c44..c64af05 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -1,27 +1,29 @@
  ; ## Full FP16 support enabled by default.
  ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
  ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:          -mattr=+ptx60                                                 \
  ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
  ; RUN: %if ptxas %{                                                           \
  ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
  ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:          -mattr=+ptx60                                                 \
  ; RUN:   | %ptxas-verify -arch=sm_53                                          \
  ; RUN: %}
  ; ## Full FP16 with FTZ
  ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
  ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN:          -denormal-fp-math-f32=preserve-sign \
+; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
  ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
  ; RUN: %if ptxas %{                                                           \
  ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
  ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN:          -denormal-fp-math-f32=preserve-sign                           \
+; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
  ; RUN:   | %ptxas-verify -arch=sm_53                                          \
  ; RUN: %}
  ; ## FP16 support explicitly disabled.
  ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
  ; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
-; RUN:           -verify-machineinstrs \
+; RUN:          -verify-machineinstrs -mattr=+ptx60                         \
  ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
  ; RUN: %if ptxas %{                                                           \
  ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
@@ -1168,5 +1170,24 @@ define half @test_fmuladd(half %a, half %b, half %c) #0 {
    ret half %r
  }
  
+; CHECK-LABEL: test_neg_f16(
+; CHECK-F16-NOFTZ: neg.f16
+; CHECK-F16-FTZ: neg.ftz.f16
+; CHECK-NOF16: xor.b16         %rs{{.*}}, %rs{{.*}}, -32768
+define half @test_neg_f16(half noundef %arg) #0 {
+  %res = fneg half %arg
+  ret half %res
+}
+
+; CHECK-LABEL: test_neg_f16x2(
+; CHECK-F16-NOFTZ: neg.f16x2
+; CHECK-F16-FTZ: neg.ftz.f16x2
+; CHECK-NOF16: xor.b16         %rs{{.*}}, %rs{{.*}}, -32768
+; CHECK-NOF16: xor.b16         %rs{{.*}}, %rs{{.*}}, -32768
+define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
+  %res = fneg <2 x half> %arg
+  ret <2 x half> %res
+}
+
  attributes #0 = { nounwind }
  attributes #1 = { "unsafe-fp-math" = "true" }
author	Jakub Chlanda <j.chlanda@gmail.com>
	Thu, 13 Oct 2022 17:40:00 +0000 (10:40 -0700)
committer	Artem Belevich <tra@google.com>
	Thu, 13 Oct 2022 17:48:33 +0000 (10:48 -0700)
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/NVPTX/f16-instructions.ll		patch \| blob \| history