From f8e0e5db48601cb0d019405703ccaa2378f503e0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Aug 2020 10:27:46 +0100
Subject: [PATCH] [X86] Enable constexpr on _cast fp<-> uint intrinsics
 (PR31446)

As suggested by @rsmith on PR47267, by replacing the builtin_memcpy bitcast pattern with builtin_bit_cast we can use _castf32_u32, _castu32_f32, _castf64_u64 and _castu64_f64 inside constant expresssions (constexpr). Although __builtin_bit_cast was added for c++20 it works on all clang c/c++ modes.

Differential Revision: https://reviews.llvm.org/D86398
---
 clang/docs/ReleaseNotes.rst       |  3 +++
 clang/lib/Headers/ia32intrin.h    | 19 +++++----------
 clang/test/CodeGen/x86-builtins.c | 50 +++++++++++++++++++--------------------
 3 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ba2b540..b4e594a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -179,6 +179,9 @@ X86 Support in Clang
 - The x86 intrinsics ``__bswap``, ``__bswapd``, ``__bswap64`` and ``__bswapq``
   may now be used within constant expressions.
 
+- The x86 intrinsics ``_castf32_u32``, ``_castf64_u64``, ``_castu32_f32`` and
+  ``_castu64_f64`` may now be used within constant expressions.
+
 Internal API Changes
 --------------------
 
diff --git a/clang/lib/Headers/ia32intrin.h b/clang/lib/Headers/ia32intrin.h
index 1d17a56..f01a3ad 100644
--- a/clang/lib/Headers/ia32intrin.h
+++ b/clang/lib/Headers/ia32intrin.h
@@ -16,12 +16,13 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
-#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__))
 #define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
 #else
+#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__))
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
 #endif
 
@@ -218,9 +219,7 @@ __writeeflags(unsigned int __f)
  */
 static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST
 _castf32_u32(float __A) {
-  unsigned int D;
-  __builtin_memcpy(&D, &__A, sizeof(__A));
-  return D;
+  return __builtin_bit_cast(unsigned int, __A);
 }
 
 /** Cast a 64-bit float value to a 64-bit unsigned integer value
@@ -235,9 +234,7 @@ _castf32_u32(float __A) {
  */
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST
 _castf64_u64(double __A) {
-  unsigned long long D;
-  __builtin_memcpy(&D, &__A, sizeof(__A));
-  return D;
+  return __builtin_bit_cast(unsigned long long, __A);
 }
 
 /** Cast a 32-bit unsigned integer value to a 32-bit float value
@@ -252,9 +249,7 @@ _castf64_u64(double __A) {
  */
 static __inline__ float __DEFAULT_FN_ATTRS_CAST
 _castu32_f32(unsigned int __A) {
-  float D;
-  __builtin_memcpy(&D, &__A, sizeof(__A));
-  return D;
+  return __builtin_bit_cast(float, __A);
 }
 
 /** Cast a 64-bit unsigned integer value to a 64-bit float value
@@ -269,9 +264,7 @@ _castu32_f32(unsigned int __A) {
  */
 static __inline__ double __DEFAULT_FN_ATTRS_CAST
 _castu64_f64(unsigned long long __A) {
-  double D;
-  __builtin_memcpy(&D, &__A, sizeof(__A));
-  return D;
+  return __builtin_bit_cast(double, __A);
 }
 
 /** Adds the unsigned integer operand to the CRC-32C checksum of the
diff --git a/clang/test/CodeGen/x86-builtins.c b/clang/test/CodeGen/x86-builtins.c
index fa2530b..6604e1f 100644
--- a/clang/test/CodeGen/x86-builtins.c
+++ b/clang/test/CodeGen/x86-builtins.c
@@ -1,45 +1,43 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK-64
-// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK-32
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <x86intrin.h>
 
 unsigned int test_castf32_u32 (float __A){
-  // CHECK-64-LABEL: @test_castf32_u32
-  // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 4, i1 false)
-  // CHECK-64: %{{.*}} = load i32, i32* %{{.*}}, align 4
-  // CHECK-32-LABEL: @test_castf32_u32
-  // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 4, i1 false)
-  // CHECK-32: %{{.*}} = load i32, i32* %{{.*}}, align 4
+  // CHECK-LABEL: test_castf32_u32
+  // CHECK: bitcast float* %{{.*}} to i32*
+  // CHECK: %{{.*}} = load i32, i32* %{{.*}}, align 4
   return _castf32_u32(__A);
 }
 
 unsigned long long test_castf64_u64 (double __A){
-  // CHECK-64-LABEL: @test_castf64_u64
-  // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 8, i1 false)
-  // CHECK-64: %{{.*}} = load i64, i64* %{{.*}}, align 8
-  // CHECK-32-LABEL: @test_castf64_u64
-  // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i32 8, i1 false)
-  // CHECK-32: %{{.*}} = load i64, i64* %{{.*}}, align 8
+  // CHECK-LABEL: test_castf64_u64
+  // CHECK: bitcast double* %{{.*}} to i64*
+  // CHECK: %{{.*}} = load i64, i64* %{{.*}}, align 8
   return _castf64_u64(__A);
 }
 
 float test_castu32_f32 (unsigned int __A){
-  // CHECK-64-LABEL: @test_castu32_f32
-  // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 4, i1 false)
-  // CHECK-64: %{{.*}} = load float, float* %{{.*}}, align 4
-  // CHECK-32-LABEL: @test_castu32_f32
-  // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 4, i1 false)
-  // CHECK-32: %{{.*}} = load float, float* %{{.*}}, align 4
+  // CHECK-LABEL: test_castu32_f32
+  // CHECK: bitcast i32* %{{.*}} to float*
+  // CHECK: %{{.*}} = load float, float* %{{.*}}, align 4
   return _castu32_f32(__A);
 }
 
 double test_castu64_f64 (unsigned long long __A){
-  // CHECK-64-LABEL: @test_castu64_f64
-  // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 8, i1 false)
-  // CHECK-64: %{{.*}} = load double, double* %{{.*}}, align 8
-  // CHECK-32-LABEL: @test_castu64_f64
-  // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i32 8, i1 false)
-  // CHECK-32: %{{.*}} = load double, double* %{{.*}}, align 8
+  // CHECK-LABEL: test_castu64_f64
+  // CHECK: bitcast i64* %{{.*}} to double*
+  // CHECK: %{{.*}} = load double, double* %{{.*}}, align 8
   return _castu64_f64(__A);
 }
 
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+char cast_f32_u32_0[_castf32_u32(-0.0f) == 0x80000000 ? 1 : -1];
+char cast_u32_f32_0[_castu32_f32(0x3F800000) == +1.0f ? 1 : -1];
+
+char castf64_u64_0[_castf64_u64(-0.0) == 0x8000000000000000 ? 1 : -1];
+char castu64_f64_0[_castu64_f64(0xBFF0000000000000ULL) == -1.0 ? 1 : -1];
+#endif
-- 
2.7.4