setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
}
- // There's no neg.f16 instruction. Expand to (0-x).
- setOperationAction(ISD::FNEG, MVT::f16, Expand);
- setOperationAction(ISD::FNEG, MVT::v2f16, Expand);
+ // f16/f16x2 neg was introduced in PTX 60, SM_53.
+ const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
+ STI.getPTXVersion() >= 60 &&
+ STI.allowFP16Math();
+ for (const auto &VT : {MVT::f16, MVT::v2f16})
+ setOperationAction(ISD::FNEG, VT,
+ IsFP16FP16x2NegAvailable ? Legal : Expand);
// (would be) Library functions.
defm FSQRT : F2<"sqrt.rn", fsqrt>;
//
+// F16 NEG
+//
+class FNEG_F16_F16X2<string OpcStr, RegisterClass RC, Predicate Pred> :
+ NVPTXInst<(outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcStr, " \t$dst, $src;"),
+ [(set RC:$dst, (fneg RC:$src))]>,
+ Requires<[useFP16Math, hasPTX60, hasSM53, Pred]>;
+def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", Float16Regs, doF32FTZ>;
+def FNEG16 : FNEG_F16_F16X2<"neg.f16", Float16Regs, True>;
+def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", Float16x2Regs, doF32FTZ>;
+def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", Float16x2Regs, True>;
+
+//
// F64 division
//
def FDIV641r :
; ## Full FP16 support enabled by default.
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
; RUN: %if ptxas %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: -mattr=+ptx60 \
; RUN: | %ptxas-verify -arch=sm_53 \
; RUN: %}
; ## Full FP16 with FTZ
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: -denormal-fp-math-f32=preserve-sign \
+; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
; RUN: %if ptxas %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: -denormal-fp-math-f32=preserve-sign \
+; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \
; RUN: | %ptxas-verify -arch=sm_53 \
; RUN: %}
; ## FP16 support explicitly disabled.
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
-; RUN: -verify-machineinstrs \
+; RUN: -verify-machineinstrs -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
; RUN: %if ptxas %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
ret half %r
}
+; CHECK-LABEL: test_neg_f16(
+; CHECK-F16-NOFTZ: neg.f16
+; CHECK-F16-FTZ: neg.ftz.f16
+; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768
+define half @test_neg_f16(half noundef %arg) #0 {
+ %res = fneg half %arg
+ ret half %res
+}
+
+; CHECK-LABEL: test_neg_f16x2(
+; CHECK-F16-NOFTZ: neg.f16x2
+; CHECK-F16-FTZ: neg.ftz.f16x2
+; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768
+; CHECK-NOF16: xor.b16 %rs{{.*}}, %rs{{.*}}, -32768
+define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
+ %res = fneg <2 x half> %arg
+ ret <2 x half> %res
+}
+
attributes #0 = { nounwind }
attributes #1 = { "unsafe-fp-math" = "true" }