From: Matt Arsenault Date: Tue, 15 Jul 2014 17:23:46 +0000 (+0000) Subject: Add codegen for more R600 builtins X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8587711164cf478d438074b22ef887a2717a5096;p=platform%2Fupstream%2Fllvm.git Add codegen for more R600 builtins llvm-svn: 213079 --- diff --git a/clang/include/clang/Basic/BuiltinsR600.def b/clang/include/clang/Basic/BuiltinsR600.def index e0812d7..49135cc 100644 --- a/clang/include/clang/Basic/BuiltinsR600.def +++ b/clang/include/clang/Basic/BuiltinsR600.def @@ -16,5 +16,17 @@ BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n") +BUILTIN(__builtin_amdgpu_div_fmas, "dddd", "nc") +BUILTIN(__builtin_amdgpu_div_fmasf, "ffff", "nc") +BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc") +BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc") +BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc") +BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc") +BUILTIN(__builtin_amdgpu_rcp, "dd", "nc") +BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgpu_rsq, "dd", "nc") +BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc") +BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc") +BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc") #undef BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0f1a146..3734716 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6007,6 +6007,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } } +// Emit an intrinsic that has 1 float or double. +static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, Src0); +} + +// Emit an intrinsic that has 3 float or double operands. +static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); +} + Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { @@ -6037,7 +6059,30 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); FlagStore->setAlignment(FlagOutPtr.second); return Result; - } default: + } + case R600::BI__builtin_amdgpu_div_fmas: + case R600::BI__builtin_amdgpu_div_fmasf: + return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas); + case R600::BI__builtin_amdgpu_div_fixup: + case R600::BI__builtin_amdgpu_div_fixupf: + return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); + case R600::BI__builtin_amdgpu_trig_preop: + case R600::BI__builtin_amdgpu_trig_preopf: { + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Value *Src1 = EmitScalarExpr(E->getArg(1)); + Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_trig_preop, Src0->getType()); + return Builder.CreateCall2(F, Src0, Src1); + } + case R600::BI__builtin_amdgpu_rcp: + case R600::BI__builtin_amdgpu_rcpf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); + case R600::BI__builtin_amdgpu_rsq: + case R600::BI__builtin_amdgpu_rsqf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); + case R600::BI__builtin_amdgpu_rsq_clamped: + case R600::BI__builtin_amdgpu_rsq_clampedf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); + default: return nullptr; } } diff --git a/clang/test/CodeGenOpenCL/builtins-r600.cl b/clang/test/CodeGenOpenCL/builtins-r600.cl index 0531038..cebeba1 100644 --- a/clang/test/CodeGenOpenCL/builtins-r600.cl +++ b/clang/test/CodeGenOpenCL/builtins-r600.cl @@ -28,3 +28,87 @@ void test_div_scale_f32(global float* out, global int* flagout, float a, float b *out = __builtin_amdgpu_div_scalef(a, b, true, &flag); *flagout = flag; } + +// CHECK-LABEL: @test_div_fmas_f32 +// CHECK: call float @llvm.AMDGPU.div.fmas.f32 +void test_div_fmas_f32(global float* out, float a, float b, float c) +{ + *out = __builtin_amdgpu_div_fmasf(a, b, c); +} + +// CHECK-LABEL: @test_div_fmas_f64 +// CHECK: call double @llvm.AMDGPU.div.fmas.f64 +void test_div_fmas_f64(global double* out, double a, double b, double c) +{ + *out = __builtin_amdgpu_div_fmas(a, b, c); +} + +// CHECK-LABEL: @test_div_fixup_f32 +// CHECK: call float @llvm.AMDGPU.div.fixup.f32 +void test_div_fixup_f32(global float* out, float a, float b, float c) +{ + *out = __builtin_amdgpu_div_fixupf(a, b, c); +} + +// CHECK-LABEL: @test_div_fixup_f64 +// CHECK: call double @llvm.AMDGPU.div.fixup.f64 +void test_div_fixup_f64(global double* out, double a, double b, double c) +{ + *out = __builtin_amdgpu_div_fixup(a, b, c); +} + +// CHECK-LABEL: @test_trig_preop_f32 +// CHECK: call float @llvm.AMDGPU.trig.preop.f32 +void test_trig_preop_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgpu_trig_preopf(a, b); +} + +// CHECK-LABEL: @test_trig_preop_f64 +// CHECK: call double @llvm.AMDGPU.trig.preop.f64 +void test_trig_preop_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgpu_trig_preop(a, b); +} + +// CHECK-LABEL: @test_rcp_f32 +// CHECK: call float @llvm.AMDGPU.rcp.f32 +void test_rcp_f32(global float* out, float a) +{ + *out = __builtin_amdgpu_rcpf(a); +} + +// CHECK-LABEL: @test_rcp_f64 +// CHECK: call double @llvm.AMDGPU.rcp.f64 +void test_rcp_f64(global double* out, double a) +{ + *out = __builtin_amdgpu_rcp(a); +} + +// CHECK-LABEL: @test_rsq_f32 +// CHECK: call float @llvm.AMDGPU.rsq.f32 +void test_rsq_f32(global float* out, float a) +{ + *out = __builtin_amdgpu_rsqf(a); +} + +// CHECK-LABEL: @test_rsq_f64 +// CHECK: call double @llvm.AMDGPU.rsq.f64 +void test_rsq_f64(global double* out, double a) +{ + *out = __builtin_amdgpu_rsq(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f32 +// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32 +void test_rsq_clamped_f32(global float* out, float a) +{ + *out = __builtin_amdgpu_rsq_clampedf(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f64 +// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64 +void test_rsq_clamped_f64(global double* out, double a) +{ + *out = __builtin_amdgpu_rsq_clamped(a); +}