AMDGPU: Fix fast math log2 f32
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 14 Aug 2023 23:37:32 +0000 (19:37 -0400)
committerTobias Hieta <tobias@hieta.se>
Mon, 21 Aug 2023 07:54:24 +0000 (09:54 +0200)
Apparently afn doesn't allow you to drop the denormal handling
according to OpenCL conformance. This was hidden by losing the flags
during the library linking process. Fast log is still broken and needs
more work.

https://reviews.llvm.org/D157936
(cherry picked from commit e09b3593ba64d004a9d2b3fa41be2ba84f968a88)

llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/llvm.log2.ll

index 0a7ae20..7c749e2 100644 (file)
@@ -980,8 +980,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                              half). Not implemented for double. Hardware provides
                                              1ULP accuracy for float, and 0.51ULP for half. Float
                                              instruction does not natively support denormal
-                                             inputs. Backend will optimize out denormal scaling if
-                                             marked with the :ref:`afn <fastmath_afn>` flag.
+                                             inputs.
 
   :ref:`llvm.sqrt <int_sqrt>`                Implemented for double, float and half (and vectors).
 
index 254d02d..fc82fb6 100644 (file)
@@ -2528,7 +2528,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
 std::pair<SDValue, SDValue>
 AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
                                         SDValue Src, SDNodeFlags Flags) const {
-  if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+  if (!needsDenormHandlingF32(DAG, Src, Flags))
     return {};
 
   MVT VT = MVT::f32;
index 120c00b..9325b14 100644 (file)
@@ -3037,8 +3037,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
 std::pair<Register, Register>
 AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
                                        unsigned Flags) const {
-  if (allowApproxFunc(B.getMF(), Flags) ||
-      !needsDenormHandlingF32(B.getMF(), Src, Flags))
+  if (!needsDenormHandlingF32(B.getMF(), Src, Flags))
     return {};
 
   const LLT F32 = LLT::scalar(32);
index f5e30d6..6485f13 100644 (file)
@@ -1511,17 +1511,59 @@ define float @v_log2_fneg_f32(float %in) {
 }
 
 define float @v_log2_f32_fast(float %in) {
-; GFX689-LABEL: v_log2_f32_fast:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_fast:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_fast:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_fast:
 ; R600:       ; %bb.0:
@@ -1537,17 +1579,59 @@ define float @v_log2_f32_fast(float %in) {
 }
 
 define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_unsafe_math_attr:
 ; R600:       ; %bb.0:
@@ -1563,17 +1647,59 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 }
 
 define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_approx_fn_attr:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_approx_fn_attr:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_approx_fn_attr:
 ; R600:       ; %bb.0:
@@ -1657,17 +1783,59 @@ define float @v_log2_f32_ninf(float %in) {
 }
 
 define float @v_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_log2_f32_afn:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_afn:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_afn:
 ; R600:       ; %bb.0:
@@ -1709,17 +1877,59 @@ define float @v_log2_f32_afn_daz(float %in) #0 {
 }
 
 define float @v_log2_f32_afn_dynamic(float %in) #1 {
-; GFX689-LABEL: v_log2_f32_afn_dynamic:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_afn_dynamic:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_afn_dynamic:
 ; R600:       ; %bb.0:
@@ -1735,17 +1945,61 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 }
 
 define float @v_fabs_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_fabs_log2_f32_afn:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e64 v0, |v0|
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_fabs_log2_f32_afn:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e64 v0, |v0|
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e64 v0, |v0|, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_fabs_log2_f32_afn:
 ; R600:       ; %bb.0: