AMDGPU: Add llvm.amdgcn.log intrinsic

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sun, 11 Jun 2023 16:25:30 +0000 (12:25 -0400)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 13 Jun 2023 01:10:30 +0000 (21:10 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sun, 11 Jun 2023 16:25:30 +0000 (12:25 -0400)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 13 Jun 2023 01:10:30 +0000 (21:10 -0400)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def

index 0196100..0ccdc30 100644 (file)
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -100,6 +100,7 @@ BUILTIN(__builtin_amdgcn_rsq_clamp, "dd", "nc")
  BUILTIN(__builtin_amdgcn_rsq_clampf, "ff", "nc")
  BUILTIN(__builtin_amdgcn_sinf, "ff", "nc")
  BUILTIN(__builtin_amdgcn_cosf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_logf, "ff", "nc")
  BUILTIN(__builtin_amdgcn_log_clampf, "ff", "nc")
  BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc")
  BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index 852d6f0..421c3d2 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17171,6 +17171,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
      return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
    case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
      return EmitAMDGPUDispatchPtr(*this, E);
+  case AMDGPU::BI__builtin_amdgcn_logf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
    case AMDGPU::BI__builtin_amdgcn_log_clampf:
      return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
    case AMDGPU::BI__builtin_amdgcn_ldexp:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl

index 5cf965e..3900a8c 100644 (file)
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -172,6 +172,13 @@ void test_cos_f32(global float* out, float a)
    *out = __builtin_amdgcn_cosf(a);
  }
  
+// CHECK-LABEL: @test_log_f32
+// CHECK: call float @llvm.amdgcn.log.f32
+void test_log_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_logf(a);
+}
+
  // CHECK-LABEL: @test_log_clamp_f32
  // CHECK: call float @llvm.amdgcn.log.clamp.f32
  void test_log_clamp_f32(global float* out, float a)
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst

index 35d982f..4d078cf 100644 (file)
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,9 @@ Changes to the AMDGPU Backend
    improves the interaction between AMDGPU buffer operations and the LLVM memory
    model, and so the non `.ptr` intrinsics are deprecated.
  
+* Added llvm.amdgcn.log.f32 intrinsic. This provides direct access to
+  v_log_f32.
+
  Changes to the ARM Backend
  --------------------------
  
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td

index 82a13d5..b6bdb0d 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -300,6 +300,14 @@ def int_amdgcn_cos : DefaultAttrsIntrinsic<
    [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
  >;
  
+// v_log_{f16|f32}, performs log2. f32 version does not handle
+// denormals. There is no reason to use this for f16 as it does
+// support denormals, and the generic log intrinsic should be
+// preferred.
+def int_amdgcn_log : DefaultAttrsIntrinsic<
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+>;
+
  def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
    [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
  >;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index f94dc4f..af65ac5 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4687,6 +4687,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    NODE_NAME_CASE(RSQ)
    NODE_NAME_CASE(RCP_LEGACY)
    NODE_NAME_CASE(RCP_IFLAG)
+  NODE_NAME_CASE(LOG)
    NODE_NAME_CASE(FMUL_LEGACY)
    NODE_NAME_CASE(RSQ_CLAMP)
    NODE_NAME_CASE(FP_CLASS)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

index aed99b2..af5979e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -444,6 +444,10 @@ enum NodeType : unsigned {
    RSQ,
    RCP_LEGACY,
    RCP_IFLAG,
+
+  // log2, no denormal handling for f32.
+  LOG,
+
    FMUL_LEGACY,
    RSQ_CLAMP,
    FP_CLASS,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

index 9c01cb6..ece730c 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -115,6 +115,9 @@ def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
  // out = 1.0 / a
  def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
  
+// v_log_f32, which is log2
+def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
+
  // out = 1.0 / sqrt(a)
  def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
  
@@ -385,6 +388,12 @@ def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src),
                                             (AMDGPUcos_impl node:$src)]>;
  def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
                                               (AMDGPUfract_impl node:$src)]>;
+def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+                                           (AMDGPUlog_impl node:$src),
+                                           (flog2 node:$src)]>;
+def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+                                              (flog2 node:$src)]>;
+
  
  def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
    [(int_amdgcn_class node:$src0, node:$src1),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

index 4ffbac0..d0425f8 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4207,6 +4207,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
      case Intrinsic::amdgcn_sin:
      case Intrinsic::amdgcn_cos:
      case Intrinsic::amdgcn_log_clamp:
+    case Intrinsic::amdgcn_log:
      case Intrinsic::amdgcn_rcp:
      case Intrinsic::amdgcn_rcp_legacy:
      case Intrinsic::amdgcn_sqrt:
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td

index b53e9c2..d42f3c5 100644 (file)
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1124,7 +1124,7 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
  >;
  
  class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
-  inst, "LOG_IEEE", flog2
+  inst, "LOG_IEEE", AMDGPUlog
  > {
    let Itinerary = TransALU;
  }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index 6a3a6dd..0bf8f66 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10555,6 +10555,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
      case Intrinsic::amdgcn_rcp_legacy:
      case Intrinsic::amdgcn_rsq_legacy:
      case Intrinsic::amdgcn_trig_preop:
+    case Intrinsic::amdgcn_log:
        return true;
      default:
        break;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td

index 2c3308e..2179dda 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -322,7 +322,7 @@ defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
  
  let TRANS = 1, SchedRW = [WriteTrans32] in {
  defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
-defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
+defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>;
  defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
  defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
  defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
@@ -487,7 +487,7 @@ let TRANS = 1, SchedRW = [WriteTrans32] in {
  defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
  defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
  defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
-defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>;
+defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>;
  defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>;
  defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
  defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll

index 2ba4bba..3c9174e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -872,6 +872,16 @@ define float @v_test_canonicalize_frexp_mant(float %a) {
    ret float %canonicalized
  }
  
+; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_log:
+; GCN: s_waitcnt
+; GCN-NEXT: v_log_f32
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_amdgcn_log(float %a) {
+  %log = call float @llvm.amdgcn.log.f32(float %a)
+  %canonicalized = call float @llvm.canonicalize.f32(float %log)
+  ret float %canonicalized
+}
+
  ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
  ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
  ; GCN: .amd_amdgpu_isa
@@ -900,6 +910,7 @@ declare double @llvm.maxnum.f64(double, double) #0
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
  declare float @llvm.amdgcn.cubeid(float, float, float) #0
  declare float @llvm.amdgcn.frexp.mant.f32(float) #0
+declare float @llvm.amdgcn.log.f32(float) #0
  
  attributes #0 = { nounwind readnone }
  attributes #1 = { "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll

new file mode 100644 (file)

index 0000000..ebdda38
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL %s
+
+define float @v_log_f32(float %src)  {
+; GCN-LABEL: v_log_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %log = call float @llvm.amdgcn.log.f32(float %src)
+  ret float %log
+}
+
+define float @v_fabs_log_f32(float %src)  {
+; GCN-LABEL: v_fabs_log_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f32_e64 v0, |v0|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.src = call float @llvm.fabs.f32(float %src)
+  %log = call float @llvm.amdgcn.log.f32(float %fabs.src)
+  ret float %log
+}
+
+define float @v_fneg_fabs_log_f32(float %src)  {
+; GCN-LABEL: v_fneg_fabs_log_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f32_e64 v0, -|v0|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.src = call float @llvm.fabs.f32(float %src)
+  %neg.fabs.src = fneg float %fabs.src
+  %log = call float @llvm.amdgcn.log.f32(float %neg.fabs.src)
+  ret float %log
+}
+
+define half @v_log_f16(half %src)  {
+; GCN-LABEL: v_log_f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f16_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %log = call half @llvm.amdgcn.log.f16(half %src)
+  ret half %log
+}
+
+define half @v_fabs_log_f16(half %src)  {
+; GCN-LABEL: v_fabs_log_f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f16_e64 v0, |v0|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.src = call half @llvm.fabs.f16(half %src)
+  %log = call half @llvm.amdgcn.log.f16(half %fabs.src)
+  ret half %log
+}
+
+define half @v_fneg_fabs_log_f16(half %src)  {
+; GCN-LABEL: v_fneg_fabs_log_f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f16_e64 v0, -|v0|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.src = call half @llvm.fabs.f16(half %src)
+  %neg.fabs.src = fneg half %fabs.src
+  %log = call half @llvm.amdgcn.log.f16(half %neg.fabs.src)
+  ret half %log
+}
+
+declare half @llvm.amdgcn.log.f16(half) #0
+declare float @llvm.amdgcn.log.f32(float) #0
+declare float @llvm.fabs.f32(float) #0
+declare half @llvm.fabs.f16(half) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sun, 11 Jun 2023 16:25:30 +0000 (12:25 -0400)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 13 Jun 2023 01:10:30 +0000 (21:10 -0400)
clang/include/clang/Basic/BuiltinsAMDGPU.def		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/test/CodeGenOpenCL/builtins-amdgcn.cl		patch \| blob \| history
llvm/docs/ReleaseNotes.rst		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicsAMDGPU.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/R600Instructions.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP1Instructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll	[new file with mode: 0644]	patch \| blob