From 437346abe18ec4fc982ae36f6821487dafc1a06e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 7 Dec 2022 22:48:27 -0500
Subject: [PATCH] clang: Add __builtin_elementwise canonicalize and copysign

Just copy paste from the other functions. I also need fma, but
the current code seems to assume 1 or 2 arguments.
---
 clang/docs/LanguageExtensions.rst              |  7 ++-
 clang/include/clang/Basic/Builtins.def         |  2 +
 clang/lib/CodeGen/CGBuiltin.cpp                |  6 +-
 clang/lib/Sema/SemaChecking.cpp                |  4 +-
 clang/test/CodeGen/builtins-elementwise-math.c | 52 ++++++++++++++++++
 clang/test/Sema/builtins-elementwise-math.c    | 76 ++++++++++++++++++++++++++
 6 files changed, 143 insertions(+), 4 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index ad6e10c..9b39dc2 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -639,8 +639,11 @@ Unless specified otherwise operation(Â±0) = Â±0 and operation(Â±infinity) = Â±in
                                              rounding halfway cases to even (that is, to the nearest value
                                              that is an even integer), regardless of the current rounding
                                              direction.
- T__builtin_elementwise_trunc(T x)           return the integral value nearest to but no larger in            floating point types
+ T __builtin_elementwise_trunc(T x)          return the integral value nearest to but no larger in            floating point types
                                              magnitude than x
+ T __builtin_elementwise_canonicalize(T x)   return the platform specific canonical encoding                  floating point types
+                                             of a floating-point number
+ T __builtin_elementwise_copysign(T x, T y)  return the magnitude of x with the sign of y.                    floating point types
  T __builtin_elementwise_max(T x, T y)       return x or y, whichever is larger                               integer and floating point types
  T __builtin_elementwise_min(T x, T y)       return x or y, whichever is smaller                              integer and floating point types
  T __builtin_elementwise_add_sat(T x, T y)   return the sum of x and y, clamped to the range of               integer types
@@ -4203,7 +4206,7 @@ these values is same as for `constrained floating point intrinsics <http://llvm.
     #pragma clang fp exceptions(strict)
     z = x + y;
     if (fetestexcept(FE_OVERFLOW))
-	  ...
+      ...
   }
 
 A ``#pragma clang fp`` pragma may contain any number of options:
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 2279d80..5008a4e 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -665,6 +665,8 @@ BUILTIN(__builtin_elementwise_floor, "v.", "nct")
 BUILTIN(__builtin_elementwise_roundeven, "v.", "nct")
 BUILTIN(__builtin_elementwise_sin, "v.", "nct")
 BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
+BUILTIN(__builtin_elementwise_canonicalize, "v.", "nct")
+BUILTIN(__builtin_elementwise_copysign, "v.", "nct")
 BUILTIN(__builtin_elementwise_add_sat, "v.", "nct")
 BUILTIN(__builtin_elementwise_sub_sat, "v.", "nct")
 BUILTIN(__builtin_reduce_max, "v.", "nct")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a1149bf..ef0802e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3091,7 +3091,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_trunc:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
-
+  case Builtin::BI__builtin_elementwise_canonicalize:
+    return RValue::get(
+        emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.trunc"));
+  case Builtin::BI__builtin_elementwise_copysign:
+    return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 68ae989..d009d55 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2577,7 +2577,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_floor:
   case Builtin::BI__builtin_elementwise_roundeven:
   case Builtin::BI__builtin_elementwise_sin:
-  case Builtin::BI__builtin_elementwise_trunc: {
+  case Builtin::BI__builtin_elementwise_trunc:
+  case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
       return ExprError();
 
@@ -2620,6 +2621,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
 
   case Builtin::BI__builtin_elementwise_min:
   case Builtin::BI__builtin_elementwise_max:
+  case Builtin::BI__builtin_elementwise_copysign:
     if (SemaBuiltinElementwiseMath(TheCall))
       return ExprError();
     break;
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 5834816..489b6dc 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -3,6 +3,8 @@
 typedef float float4 __attribute__((ext_vector_type(4)));
 typedef short int si8 __attribute__((ext_vector_type(8)));
 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef double double3 __attribute__((ext_vector_type(3)));
 
 __attribute__((address_space(1))) int int_as_one;
 typedef int bar;
@@ -412,3 +414,53 @@ void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]])
   vf2 = __builtin_elementwise_trunc(vf1);
 }
+
+void test_builtin_elementwise_canonicalize(float f1, float f2, double d1, double d2,
+                                           float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_canonicalize(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.canonicalize.f32(float [[F1]])
+  f2 = __builtin_elementwise_canonicalize(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.canonicalize.f64(double [[D1]])
+  d2 = __builtin_elementwise_canonicalize(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_canonicalize(vf1);
+}
+
+void test_builtin_elementwise_copysign(float f1, float f2, double d1, double d2,
+                                       float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_copysign(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
+  // CHECK-NEXT:  call float @llvm.copysign.f32(float %0, float %1)
+  f1 = __builtin_elementwise_copysign(f1, f2);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.copysign.f64(double [[D1]], double [[D2]])
+  d1 = __builtin_elementwise_copysign(d1, d2);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.copysign.f64(double [[D1]], double 2.000000e+00)
+  d1 = __builtin_elementwise_copysign(d1, 2.0);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
+  vf1 = __builtin_elementwise_copysign(vf1, vf2);
+
+  // CHECK:      [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
+  const float4 cvf1 = vf1;
+  vf1 = __builtin_elementwise_copysign(cvf1, vf2);
+
+  // CHECK:      [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
+  vf1 = __builtin_elementwise_copysign(vf2, cvf1);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index c041a1f..500dc3a 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -384,3 +384,79 @@ void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv,
   uv = __builtin_elementwise_trunc(uv);
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
+
+void test_builtin_elementwise_canonicalize(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_canonicalize(f);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_canonicalize();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_canonicalize(i);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_canonicalize(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_canonicalize(u);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_canonicalize(uv);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
+void test_builtin_elementwise_copysign(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
+  i = __builtin_elementwise_copysign(p, d);
+  // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
+
+  struct Foo foo = __builtin_elementwise_copysign(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_copysign(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_copysign();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_copysign(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+  i = __builtin_elementwise_copysign(v, iv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_copysign(uv, iv);
+  // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
+
+  s = __builtin_elementwise_copysign(i, s);
+
+  enum e { one,
+           two };
+  i = __builtin_elementwise_copysign(one, two);
+
+  enum f { three };
+  enum f x = __builtin_elementwise_copysign(one, three);
+
+  _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
+  ext = __builtin_elementwise_copysign(ext, ext);
+
+  const int ci;
+  i = __builtin_elementwise_copysign(ci, i);
+  i = __builtin_elementwise_copysign(i, ci);
+  i = __builtin_elementwise_copysign(ci, ci);
+
+  i = __builtin_elementwise_copysign(i, int_as_one); // ok (attributes don't match)?
+  i = __builtin_elementwise_copysign(i, b);          // ok (sugar doesn't match)?
+
+  int A[10];
+  A = __builtin_elementwise_copysign(A, A);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}}
+
+  int(ii);
+  int j;
+  j = __builtin_elementwise_copysign(i, j);
+
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_copysign(c1, c2);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
+}
-- 
2.7.4