From 8a21ea1d0a3883a8d0aa15440388f91e49da4d08 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 30 Apr 2023 10:19:57 -0400
Subject: [PATCH] clang: Start emitting intrinsic for __builtin_ldexp*

Also introduce __builtin_ldexpf16.
---
 clang/include/clang/Basic/Builtins.def             |  1 +
 clang/lib/CodeGen/CGBuiltin.cpp                    | 31 +++++++++++++++++++++-
 clang/test/CodeGen/aix-builtin-mapping.c           |  2 +-
 clang/test/CodeGen/constrained-math-builtins.c     | 15 ++++++++++-
 clang/test/CodeGen/math-builtins.c                 |  8 +++---
 clang/test/CodeGenOpenCL/builtins-f16.cl           |  5 +++-
 .../test/CodeGenOpenCL/builtins-generic-amdgcn.cl  | 20 ++++++++++++++
 7 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index c8f955a..4f18c48 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -159,6 +159,7 @@ BUILTIN(__builtin_ldexp , "ddi"  , "Fne")
 BUILTIN(__builtin_ldexpf, "ffi"  , "Fne")
 BUILTIN(__builtin_ldexpl, "LdLdi", "Fne")
 BUILTIN(__builtin_ldexpf128, "LLdLLdi", "Fne")
+BUILTIN(__builtin_ldexpf16, "hhi", "Fne")
 BUILTIN(__builtin_modf , "ddd*"  , "Fn")
 BUILTIN(__builtin_modff, "fff*"  , "Fn")
 BUILTIN(__builtin_modfl, "LdLdLd*", "Fn")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ddf28dc..d958198 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -540,6 +540,25 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   }
 }
 
+// Has second type mangled argument.
+static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
+    CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
+    llvm::Intrinsic::ID ConstrainedIntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+  if (CGF.Builder.getIsFPConstrained()) {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
+    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
+                                       {Src0->getType(), Src1->getType()});
+    return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
+  }
+
+  Function *F =
+      CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
+  return CGF.Builder.CreateCall(F, {Src0, Src1});
+}
+
 // Emit an intrinsic that has 3 operands of the same type as its result.
 // Depending on mode, this may be a constrained floating-point intrinsic.
 static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
@@ -2567,7 +2586,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
           *this, E, Intrinsic::llrint,
           Intrinsic::experimental_constrained_llrint));
-
+    case Builtin::BI__builtin_ldexp:
+    case Builtin::BI__builtin_ldexpf:
+    case Builtin::BI__builtin_ldexpl:
+    case Builtin::BI__builtin_ldexpf16:
+    case Builtin::BI__builtin_ldexpf128: {
+      return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
+          *this, E, Intrinsic::ldexp,
+          Intrinsic::experimental_constrained_ldexp));
+    }
     default:
       break;
     }
@@ -3035,6 +3062,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
 
     if (Builder.getIsFPConstrained()) {
+      // FIXME: llvm.powi has 2 mangling types,
+      // llvm.experimental.constrained.powi has one.
       CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
       Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
                                      Src0->getType());
diff --git a/clang/test/CodeGen/aix-builtin-mapping.c b/clang/test/CodeGen/aix-builtin-mapping.c
index 57ea558..98fcfd4 100644
--- a/clang/test/CodeGen/aix-builtin-mapping.c
+++ b/clang/test/CodeGen/aix-builtin-mapping.c
@@ -19,4 +19,4 @@ int main()
 
 // CHECK: %call = call double @modf(double noundef 1.000000e+00, ptr noundef %DummyLongDouble) #3
 // CHECK: %call1 = call double @frexp(double noundef 0.000000e+00, ptr noundef %DummyInt) #3
-// CHECK: %call2 = call double @ldexp(double noundef 1.000000e+00, i32 noundef {{(signext )?}}1) #4
+// CHECK: %{{.+}} = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 1)
diff --git a/clang/test/CodeGen/constrained-math-builtins.c b/clang/test/CodeGen/constrained-math-builtins.c
index cccfd8b..88ae8f5 100644
--- a/clang/test/CodeGen/constrained-math-builtins.c
+++ b/clang/test/CodeGen/constrained-math-builtins.c
@@ -7,7 +7,7 @@
 
 #pragma float_control(except, on)
 
-void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
+void foo(double *d, float f, float *fp, long double *l, int *i, const char *c, _Float16 h) {
   f = __builtin_fmod(f,f);    f = __builtin_fmodf(f,f);   f =  __builtin_fmodl(f,f); f = __builtin_fmodf128(f,f);
 
 // CHECK: call double @llvm.experimental.constrained.frem.f64(double %{{.*}}, double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
@@ -28,6 +28,14 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 // CHECK: call float @llvm.experimental.constrained.powi.f32(float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
 // CHECK: call x86_fp80 @llvm.experimental.constrained.powi.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
 
+
+  h = __builtin_ldexpf16(h, *i);  *d = __builtin_ldexp(*d, *i);        f = __builtin_ldexpf(f, *i);       __builtin_ldexpl(*l, *i);
+
+// CHECK: call half @llvm.experimental.constrained.ldexp.f16.i32(half %{{.*}}, i32 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+// CHECK: call double @llvm.experimental.constrained.ldexp.f64.i32(double %{{.*}}, i32 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+// CHECK: call float @llvm.experimental.constrained.ldexp.f32.i32(float %{{.*}}, i32 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+// CHECK: call x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80 %{{.*}}, i32 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+
   __builtin_ceil(f);       __builtin_ceilf(f);      __builtin_ceill(f); __builtin_ceilf128(f);
 
 // CHECK: call double @llvm.experimental.constrained.ceil.f64(double %{{.*}}, metadata !"fpexcept.strict")
@@ -190,6 +198,11 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 // CHECK: declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata)
 // CHECK: declare x86_fp80 @llvm.experimental.constrained.powi.f80(x86_fp80, i32, metadata, metadata)
 
+// CHECK: declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata)
+// CHECK: declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata)
+// CHECK: declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata)
+// CHECK: declare x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80, i32, metadata, metadata)
+
 // CHECK: declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
 // CHECK: declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
 // CHECK: declare x86_fp80 @llvm.experimental.constrained.ceil.f80(x86_fp80, metadata)
diff --git a/clang/test/CodeGen/math-builtins.c b/clang/test/CodeGen/math-builtins.c
index 04738cc..6233501 100644
--- a/clang/test/CodeGen/math-builtins.c
+++ b/clang/test/CodeGen/math-builtins.c
@@ -77,10 +77,10 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   __builtin_ldexp(f,f);    __builtin_ldexpf(f,f);   __builtin_ldexpl(f,f);  __builtin_ldexpf128(f,f);
 
-// NO__ERRNO: declare double @ldexp(double noundef, i32 noundef) [[READNONE]]
-// NO__ERRNO: declare float @ldexpf(float noundef, i32 noundef) [[READNONE]]
-// NO__ERRNO: declare x86_fp80 @ldexpl(x86_fp80 noundef, i32 noundef) [[READNONE]]
-// NO__ERRNO: declare fp128 @ldexpf128(fp128 noundef, i32 noundef) [[READNONE]]
+// NO__ERRNO: declare double @llvm.ldexp.f64.i32(double, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.ldexp.f32.i32(float, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare fp128 @llvm.ldexp.f128.i32(fp128, i32) [[READNONE_INTRINSIC]]
 // HAS_ERRNO: declare double @ldexp(double noundef, i32 noundef) [[NOT_READNONE]]
 // HAS_ERRNO: declare float @ldexpf(float noundef, i32 noundef) [[NOT_READNONE]]
 // HAS_ERRNO: declare x86_fp80 @ldexpl(x86_fp80 noundef, i32 noundef) [[NOT_READNONE]]
diff --git a/clang/test/CodeGenOpenCL/builtins-f16.cl b/clang/test/CodeGenOpenCL/builtins-f16.cl
index 48412dd..caf2074 100644
--- a/clang/test/CodeGenOpenCL/builtins-f16.cl
+++ b/clang/test/CodeGenOpenCL/builtins-f16.cl
@@ -3,7 +3,7 @@
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // CHECK-LABEL: define{{.*}} void @test_half_builtins
-void test_half_builtins(half h0, half h1, half h2) {
+void test_half_builtins(half h0, half h1, half h2, int i0) {
   volatile half res;
 
   // CHECK: call half @llvm.copysign.f16(half %h0, half %h1)
@@ -68,4 +68,7 @@ void test_half_builtins(half h0, half h1, half h2) {
 
   // CHECK: call half @llvm.canonicalize.f16(half %h0)
   res = __builtin_canonicalizef16(h0);
+
+  // CHECK: call half @llvm.ldexp.f16.i32(half %h0, i32 %i0)
+  res = __builtin_ldexpf16(h0, i0);
 }
diff --git a/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl
index 46c044a0..aa8f133 100644
--- a/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl
@@ -1,6 +1,8 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -Wno-error=int-conversion -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
 
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 // CHECK-LABEL: @test_builtin_clz(
 // CHECK: tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 void test_builtin_clz(global int* out, int a)
@@ -19,3 +21,21 @@ void test_builtin_clzl(global long* out, long a)
 void test_builtin_frame_address(int *out) {
     *out = __builtin_frame_address(0);
 }
+
+// CHECK-LABEL: @test_builtin_ldexpf16(
+// CHECK: tail call half @llvm.ldexp.f16.i32(half %v, i32 %e)
+half test_builtin_ldexpf16(half v, int e) {
+  return __builtin_ldexpf16(v, e);
+}
+
+// CHECK-LABEL: @test_builtin_ldexpf(
+// CHECK: tail call float @llvm.ldexp.f32.i32(float %v, i32 %e)
+float test_builtin_ldexpf(float v, int e) {
+  return __builtin_ldexpf(v, e);
+}
+
+// CHECK-LABEL: @test_builtin_ldexp(
+// CHECK: tail call double @llvm.ldexp.f64.i32(double %v, i32 %e)
+double test_builtin_ldexp(double v, int e) {
+  return __builtin_ldexp(v, e);
+}
-- 
2.7.4