From ca50cb120ba5853e9a56de2151ba47c201ad6be6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 15 Jun 2022 18:51:13 +0200 Subject: [PATCH] [SelectionDAG] Constant fold FP_TO_BF16 and BF16_TO_FP. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 ++++++---- llvm/test/CodeGen/X86/bfloat.ll | 31 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 97777d0..bf222a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4979,9 +4979,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); - case ISD::FP16_TO_FP: { + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: { bool Ignored; - APFloat FPV(APFloat::IEEEhalf(), + APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() + : APFloat::BFloat(), (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); // This can return overflow, underflow, or inexact; we don't care. @@ -5055,11 +5057,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: { bool Ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(APFloat::IEEEhalf(), + (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() + : APFloat::BFloat(), APFloat::rmNearestTiesToEven, &Ignored); return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll index ab8c59a..404ad09 100644 --- a/llvm/test/CodeGen/X86/bfloat.ll +++ b/llvm/test/CodeGen/X86/bfloat.ll @@ -69,3 +69,34 @@ define void @add_double(ptr %pa, ptr %pb, ptr %pc) { store double %dadd, ptr %pc ret void } + +define void @add_constant(ptr %pa, ptr %pc) { +; CHECK-LABEL: add_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT +; CHECK-NEXT: movw %ax, (%rbx) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %a = load bfloat, ptr %pa + %add = fadd bfloat %a, 1.0 + store bfloat %add, ptr %pc + ret void +} + +define void @store_constant(ptr %pc) { +; CHECK-LABEL: store_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: movw $16256, (%rdi) # imm = 0x3F80 +; CHECK-NEXT: retq + store bfloat 1.0, ptr %pc + ret void +} -- 2.7.4