BUILTIN(__builtin_amdgcn_rsq_clampf, "ff", "nc")
BUILTIN(__builtin_amdgcn_sinf, "ff", "nc")
BUILTIN(__builtin_amdgcn_cosf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_logf, "ff", "nc")
BUILTIN(__builtin_amdgcn_log_clampf, "ff", "nc")
BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc")
BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc")
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
return EmitAMDGPUDispatchPtr(*this, E);
+ case AMDGPU::BI__builtin_amdgcn_logf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
case AMDGPU::BI__builtin_amdgcn_log_clampf:
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
case AMDGPU::BI__builtin_amdgcn_ldexp:
*out = __builtin_amdgcn_cosf(a);
}
+// CHECK-LABEL: @test_log_f32
+// CHECK: call float @llvm.amdgcn.log.f32
+void test_log_f32(global float* out, float a)
+{
+ *out = __builtin_amdgcn_logf(a);
+}
+
// CHECK-LABEL: @test_log_clamp_f32
// CHECK: call float @llvm.amdgcn.log.clamp.f32
void test_log_clamp_f32(global float* out, float a)
improves the interaction between AMDGPU buffer operations and the LLVM memory
model, and so the non `.ptr` intrinsics are deprecated.
+* Added llvm.amdgcn.log.f32 intrinsic. This provides direct access to
+ v_log_f32.
+
Changes to the ARM Backend
--------------------------
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
+// v_log_{f16|f32}, performs log2. f32 version does not handle
+// denormals. There is no reason to use this for f16 as it does
+// support denormals, and the generic log intrinsic should be
+// preferred.
+def int_amdgcn_log : DefaultAttrsIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+>;
+
def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RCP_LEGACY)
NODE_NAME_CASE(RCP_IFLAG)
+ NODE_NAME_CASE(LOG)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
NODE_NAME_CASE(FP_CLASS)
RSQ,
RCP_LEGACY,
RCP_IFLAG,
+
+ // log2, no denormal handling for f32.
+ LOG,
+
FMUL_LEGACY,
RSQ_CLAMP,
FP_CLASS,
// out = 1.0 / a
def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+// v_log_f32, which is log2
+def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
+
// out = 1.0 / sqrt(a)
def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
(AMDGPUcos_impl node:$src)]>;
def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
(AMDGPUfract_impl node:$src)]>;
+def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+ (AMDGPUlog_impl node:$src),
+ (flog2 node:$src)]>;
+def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+ (flog2 node:$src)]>;
+
def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_class node:$src0, node:$src1),
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_log_clamp:
+ case Intrinsic::amdgcn_log:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_sqrt:
>;
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "LOG_IEEE", flog2
+ inst, "LOG_IEEE", AMDGPUlog
> {
let Itinerary = TransALU;
}
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_trig_preop:
+ case Intrinsic::amdgcn_log:
return true;
default:
break;
let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
-defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
+defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
-defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>;
+defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>;
defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>;
defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
ret float %canonicalized
}
+; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_log:
+; GCN: s_waitcnt
+; GCN-NEXT: v_log_f32
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_amdgcn_log(float %a) {
+ %log = call float @llvm.amdgcn.log.f32(float %a)
+ %canonicalized = call float @llvm.canonicalize.f32(float %log)
+ ret float %canonicalized
+}
+
; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
; GCN: .amd_amdgpu_isa
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
declare float @llvm.amdgcn.cubeid(float, float, float) #0
declare float @llvm.amdgcn.frexp.mant.f32(float) #0
+declare float @llvm.amdgcn.log.f32(float) #0
attributes #0 = { nounwind readnone }
attributes #1 = { "no-nans-fp-math"="true" }
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL %s
+
+define float @v_log_f32(float %src) {
+; GCN-LABEL: v_log_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %log = call float @llvm.amdgcn.log.f32(float %src)
+ ret float %log
+}
+
+define float @v_fabs_log_f32(float %src) {
+; GCN-LABEL: v_fabs_log_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f32_e64 v0, |v0|
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.src = call float @llvm.fabs.f32(float %src)
+ %log = call float @llvm.amdgcn.log.f32(float %fabs.src)
+ ret float %log
+}
+
+define float @v_fneg_fabs_log_f32(float %src) {
+; GCN-LABEL: v_fneg_fabs_log_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f32_e64 v0, -|v0|
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.src = call float @llvm.fabs.f32(float %src)
+ %neg.fabs.src = fneg float %fabs.src
+ %log = call float @llvm.amdgcn.log.f32(float %neg.fabs.src)
+ ret float %log
+}
+
+define half @v_log_f16(half %src) {
+; GCN-LABEL: v_log_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f16_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %log = call half @llvm.amdgcn.log.f16(half %src)
+ ret half %log
+}
+
+define half @v_fabs_log_f16(half %src) {
+; GCN-LABEL: v_fabs_log_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f16_e64 v0, |v0|
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.src = call half @llvm.fabs.f16(half %src)
+ %log = call half @llvm.amdgcn.log.f16(half %fabs.src)
+ ret half %log
+}
+
+define half @v_fneg_fabs_log_f16(half %src) {
+; GCN-LABEL: v_fneg_fabs_log_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_log_f16_e64 v0, -|v0|
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.src = call half @llvm.fabs.f16(half %src)
+ %neg.fabs.src = fneg half %fabs.src
+ %log = call half @llvm.amdgcn.log.f16(half %neg.fabs.src)
+ ret half %log
+}
+
+declare half @llvm.amdgcn.log.f16(half) #0
+declare float @llvm.amdgcn.log.f32(float) #0
+declare float @llvm.fabs.f32(float) #0
+declare half @llvm.fabs.f16(half) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}