From d983f5f39ee5f198a622dbf9324c3bca1f0c1989 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby@arm.com>
Date: Mon, 3 Oct 2022 11:01:50 +0000
Subject: [PATCH] [flang] Add cpowi function to runtime and use instead of
 pgmath

This patch adds a cpowi function to the flang runtime, and switches
to using that function instead of pgmath for complex number to
integer power operations.

Differential Revision: https://reviews.llvm.org/D134889
---
 flang/include/flang/Runtime/entry-names.h          |   6 +
 flang/lib/Lower/IntrinsicCall.cpp                  |  17 +++
 .../Optimizer/Transforms/SimplifyIntrinsics.cpp    |   4 -
 flang/runtime/CMakeLists.txt                       |   1 +
 flang/runtime/complex-powi.cpp                     | 125 +++++++++++++++
 flang/test/Lower/power-operator.f90                |   8 +-
 flang/unittests/Runtime/CMakeLists.txt             |   1 +
 flang/unittests/Runtime/Complex.cpp                | 170 +++++++++++++++++++++
 8 files changed, 324 insertions(+), 8 deletions(-)
 create mode 100644 flang/runtime/complex-powi.cpp
 create mode 100644 flang/unittests/Runtime/Complex.cpp
diff --git a/flang/include/flang/Runtime/entry-names.h b/flang/include/flang/Runtime/entry-names.h
index c2d68f0..abccb08 100644
--- a/flang/include/flang/Runtime/entry-names.h
+++ b/flang/include/flang/Runtime/entry-names.h
@@ -21,3 +21,9 @@
   prefix##revision##name
 #define RTNAME(name) NAME_WITH_PREFIX_AND_REVISION(_Fortran, A, name)
 #endif
+
+#ifndef RTNAME_STRING
+#define RTNAME_STRINGIFY_(x) #x
+#define RTNAME_STRINGIFY(x) RTNAME_STRINGIFY_(x)
+#define RTNAME_STRING(name) RTNAME_STRINGIFY(RTNAME(name))
+#endif
diff --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp
index fe6abb4..eeebc1c 100644
--- a/flang/lib/Lower/IntrinsicCall.cpp
+++ b/flang/lib/Lower/IntrinsicCall.cpp
@@ -34,6 +34,7 @@
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
+#include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "llvm/Support/CommandLine.h"
@@ -1215,6 +1216,14 @@ static mlir::FunctionType genF64ComplexFuncType(mlir::MLIRContext *context) {
   return mlir::FunctionType::get(context, {ctype}, {ftype});
 }
 
+template <int Kind, int Bits>
+static mlir::FunctionType
+genComplexComplexIntFuncType(mlir::MLIRContext *context) {
+  auto ctype = fir::ComplexType::get(context, Kind);
+  auto itype = mlir::IntegerType::get(context, Bits);
+  return mlir::FunctionType::get(context, {ctype, itype}, {ctype});
+}
+
 /// Callback type for generating lowering for a math operation.
 using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location,
                                         llvm::StringRef, mlir::FunctionType,
@@ -1410,6 +1419,14 @@ static constexpr MathOperation mathOperations[] = {
     // TODO: add PowIOp in math and complex dialects.
     {"pow", "llvm.powi.f32.i32", genF32F32IntFuncType<32>, genLibCall},
     {"pow", "llvm.powi.f64.i32", genF64F64IntFuncType<32>, genLibCall},
+    {"pow", RTNAME_STRING(cpowi), genComplexComplexIntFuncType<4, 32>,
+     genLibCall},
+    {"pow", RTNAME_STRING(zpowi), genComplexComplexIntFuncType<8, 32>,
+     genLibCall},
+    {"pow", RTNAME_STRING(cpowk), genComplexComplexIntFuncType<4, 64>,
+     genLibCall},
+    {"pow", RTNAME_STRING(zpowk), genComplexComplexIntFuncType<8, 64>,
+     genLibCall},
     {"sign", "copysignf", genF32F32F32FuncType,
      genMathOp<mlir::math::CopySignOp>},
     {"sign", "copysign", genF64F64F64FuncType,
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 5682fa2..cdc3ab9 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -46,10 +46,6 @@ namespace fir {
 #include "flang/Optimizer/Transforms/Passes.h.inc"
 } // namespace fir
 
-#define RTNAME_STRINGIFY2(x) #x
-#define RTNAME_STRINGIFY(x) RTNAME_STRINGIFY2(x)
-#define RTNAME_STRING(x) RTNAME_STRINGIFY(RTNAME(x))
-
 #define DEBUG_TYPE "flang-simplify-intrinsics"
 
 namespace {
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 4d5cea4..2399c92 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -76,6 +76,7 @@ add_flang_library(FortranRuntime
   assign.cpp
   buffer.cpp
   command.cpp
+  complex-powi.cpp
   complex-reduction.c
   copy.cpp
   character.cpp
diff --git a/flang/runtime/complex-powi.cpp b/flang/runtime/complex-powi.cpp
new file mode 100644
index 0000000..18723bb
--- /dev/null
+++ b/flang/runtime/complex-powi.cpp
@@ -0,0 +1,125 @@
+/*===-- flang/runtime/complex-powi.cpp ----------------------------*- C++ -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+#include "flang/Runtime/entry-names.h"
+#include <cstdint>
+#include <cstdio>
+#include <limits>
+
+#ifdef __clang_major__
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+template <typename C, typename I> C tgpowi(C base, I exp) {
+  if (exp == 0) {
+    return C{1};
+  }
+
+  bool invertResult{exp < 0};
+  bool isMin{exp == std::numeric_limits<I>::min()};
+
+  if (isMin) {
+    exp = std::numeric_limits<I>::max();
+  }
+
+  if (exp < 0) {
+    exp = exp * -1;
+  }
+
+  C origBase{base};
+
+  while ((exp & 1) == 0) {
+    base *= base;
+    exp >>= 1;
+  }
+
+  C acc{base};
+
+  while (exp > 1) {
+    exp >>= 1;
+    base *= base;
+    if ((exp & 1) == 1) {
+      acc *= base;
+    }
+  }
+
+  if (isMin) {
+    acc *= origBase;
+  }
+
+  if (invertResult) {
+    acc = C{1} / acc;
+  }
+
+  return acc;
+}
+
+#ifndef _MSC_VER
+// With most compilers, C complex is implemented as a builtin type that may have
+// specific ABI requirements
+extern "C" float _Complex RTNAME(cpowi)(float _Complex base, std::int32_t exp) {
+  return tgpowi(base, exp);
+}
+
+extern "C" double _Complex RTNAME(zpowi)(
+    double _Complex base, std::int32_t exp) {
+  return tgpowi(base, exp);
+}
+
+extern "C" float _Complex RTNAME(cpowk)(float _Complex base, std::int64_t exp) {
+  return tgpowi(base, exp);
+}
+
+extern "C" double _Complex RTNAME(zpowk)(
+    double _Complex base, std::int64_t exp) {
+  return tgpowi(base, exp);
+}
+#else
+// on MSVC, C complex is always just a struct of two members as it is not
+// supported as a builtin type. So we use C++ complex here as that has the
+// same ABI and layout. See:
+// https://learn.microsoft.com/en-us/cpp/c-runtime-library/complex-math-support
+#include <complex>
+
+// MSVC doesn't allow including <ccomplex> or <complex.h> in C++17 mode to get
+// the Windows definitions of these structs so just redefine here.
+struct Fcomplex {
+  float re;
+  float im;
+};
+
+struct Dcomplex {
+  double re;
+  double im;
+};
+
+extern "C" Fcomplex RTNAME(cpowi)(Fcomplex base, std::int32_t exp) {
+  auto cppbase = *(std::complex<float> *)(&base);
+  auto cppres = tgpowi(cppbase, exp);
+  return *(Fcomplex *)(&cppres);
+}
+
+extern "C" Dcomplex RTNAME(zpowi)(Dcomplex base, std::int32_t exp) {
+  auto cppbase = *(std::complex<double> *)(&base);
+  auto cppres = tgpowi(cppbase, exp);
+  return *(Dcomplex *)(&cppres);
+}
+
+extern "C" Fcomplex RTNAME(cpowk)(Fcomplex base, std::int64_t exp) {
+  auto cppbase = *(std::complex<float> *)(&base);
+  auto cppres = tgpowi(cppbase, exp);
+  return *(Fcomplex *)(&cppres);
+}
+
+extern "C" Dcomplex RTNAME(zpowk)(Dcomplex base, std::int32_t exp) {
+  auto cppbase = *(std::complex<double> *)(&base);
+  auto cppres = tgpowi(cppbase, exp);
+  return *(Dcomplex *)(&cppres);
+}
+
+#endif
diff --git a/flang/test/Lower/power-operator.f90 b/flang/test/Lower/power-operator.f90
index 5ad949c..a92f2b3 100644
--- a/flang/test/Lower/power-operator.f90
+++ b/flang/test/Lower/power-operator.f90
@@ -90,7 +90,7 @@ subroutine pow_c4_i4(x, y, z)
   complex :: x, z
   integer :: y
   z = x ** y
-  ! CHECK: call @__fc_powi_1
+  ! CHECK: call @_FortranAcpowi
 end subroutine
 
 ! CHECK-LABEL: pow_c4_i8
@@ -98,7 +98,7 @@ subroutine pow_c4_i8(x, y, z)
   complex :: x, z
   integer(8) :: y
   z = x ** y
-  ! CHECK: call @__fc_powk_1
+  ! CHECK: call @_FortranAcpowk
 end subroutine
 
 ! CHECK-LABEL: pow_c8_i4
@@ -106,7 +106,7 @@ subroutine pow_c8_i4(x, y, z)
   complex(8) :: x, z
   integer :: y
   z = x ** y
-  ! CHECK: call @__fz_powi_1
+  ! CHECK: call @_FortranAzpowi
 end subroutine
 
 ! CHECK-LABEL: pow_c8_i8
@@ -114,7 +114,7 @@ subroutine pow_c8_i8(x, y, z)
   complex(8) :: x, z
   integer(8) :: y
   z = x ** y
-  ! CHECK: call @__fz_powk_1
+  ! CHECK: call @_FortranAzpowk
 end subroutine
 
 ! CHECK-LABEL: pow_c4_c4
diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt
index 09de2ae..aa4eecc 100644
--- a/flang/unittests/Runtime/CMakeLists.txt
+++ b/flang/unittests/Runtime/CMakeLists.txt
@@ -2,6 +2,7 @@ add_flang_unittest(FlangRuntimeTests
   BufferTest.cpp
   CharacterTest.cpp
   CommandTest.cpp
+  Complex.cpp
   CrashHandlerFixture.cpp
   ExternalIOTest.cpp
   Format.cpp
diff --git a/flang/unittests/Runtime/Complex.cpp b/flang/unittests/Runtime/Complex.cpp
new file mode 100644
index 0000000..46f3ad2
--- /dev/null
+++ b/flang/unittests/Runtime/Complex.cpp
@@ -0,0 +1,170 @@
+//===-- flang/unittests/Runtime/Complex.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "gmock/gmock.h"
+#include "gtest/gtest-matchers.h"
+#include <limits>
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/cpp-type.h"
+#include "flang/Runtime/entry-names.h"
+
+#include <complex>
+#include <cstdint>
+
+#ifndef _MSC_VER
+#include <complex.h>
+typedef float _Complex float_Complex_t;
+typedef double _Complex double_Complex_t;
+#else
+struct float_Complex_t {
+  float re;
+  float im;
+};
+struct double_Complex_t {
+  double re;
+  double im;
+};
+#endif
+
+extern "C" float_Complex_t RTNAME(cpowi)(
+    float_Complex_t base, std::int32_t exp);
+
+extern "C" double_Complex_t RTNAME(zpowi)(
+    double_Complex_t base, std::int32_t exp);
+
+extern "C" float_Complex_t RTNAME(cpowk)(
+    float_Complex_t base, std::int64_t exp);
+
+extern "C" double_Complex_t RTNAME(zpowk)(
+    double_Complex_t base, std::int64_t exp);
+
+static std::complex<float> cpowi(std::complex<float> base, std::int32_t exp) {
+  float_Complex_t cbase{*(float_Complex_t *)(&base)};
+  float_Complex_t cres{RTNAME(cpowi)(cbase, exp)};
+  return *(std::complex<float> *)(&cres);
+}
+
+static std::complex<double> zpowi(std::complex<double> base, std::int32_t exp) {
+  double_Complex_t cbase{*(double_Complex_t *)(&base)};
+  double_Complex_t cres{RTNAME(zpowi)(cbase, exp)};
+  return *(std::complex<double> *)(&cres);
+}
+
+static std::complex<float> cpowk(std::complex<float> base, std::int64_t exp) {
+  float_Complex_t cbase{*(float_Complex_t *)(&base)};
+  float_Complex_t cres{RTNAME(cpowk)(cbase, exp)};
+  return *(std::complex<float> *)(&cres);
+}
+
+static std::complex<double> zpowk(std::complex<double> base, std::int64_t exp) {
+  double_Complex_t cbase{*(double_Complex_t *)(&base)};
+  double_Complex_t cres{RTNAME(zpowk)(cbase, exp)};
+  return *(std::complex<double> *)(&cres);
+}
+
+MATCHER_P(ExpectComplexFloatEq, c, "") {
+  using namespace testing;
+  return ExplainMatchResult(
+      AllOf(Property(&std::complex<float>::real, FloatEq(c.real())),
+          Property(&std::complex<float>::imag, FloatEq(c.imag()))),
+      arg, result_listener);
+}
+
+MATCHER_P(ExpectComplexDoubleEq, c, "") {
+  using namespace testing;
+  return ExplainMatchResult(AllOf(Property(&std::complex<double>::real,
+                                      DoubleNear(c.real(), 0.00000001)),
+                                Property(&std::complex<double>::imag,
+                                    DoubleNear(c.imag(), 0.00000001))),
+      arg, result_listener);
+}
+
+#define EXPECT_COMPLEX_FLOAT_EQ(val1, val2) \
+  EXPECT_THAT(val1, ExpectComplexFloatEq(val2))
+
+#define EXPECT_COMPLEX_DOUBLE_EQ(val1, val2) \
+  EXPECT_THAT(val1, ExpectComplexDoubleEq(val2))
+
+using namespace std::literals::complex_literals;
+
+TEST(Complex, cpowi) {
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, 0), 1.f + 0if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, 1), 3.f + 4if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, 2), -7.f + 24if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, 3), -117.f + 44if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, 4), -527.f - 336if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(3.f + 4if, -2), -0.0112f - 0.0384if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(2.f + 1if, 10), -237.f - 3116if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(0.5f + 0.6if, -10), -9.322937f - 7.2984829if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(2.f + 1if, 5), -38.f + 41if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowi(0.5f + 0.6if, -5), -1.121837f + 3.252915if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(
+      cpowi(0.f + 1if, std::numeric_limits<std::int32_t>::min()), 1.f + 0if);
+}
+
+TEST(Complex, cpowk) {
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, 0), 1.f + 0if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, 1), 3.f + 4if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, 2), -7.f + 24if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, 3), -117.f + 44if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, 4), -527.f - 336if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(3.f + 4if, -2), -0.0112f - 0.0384if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(2.f + 1if, 10), -237.f - 3116if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(0.5f + 0.6if, -10), -9.322937f - 7.2984829if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(2.f + 1if, 5), -38.f + 41if);
+  EXPECT_COMPLEX_FLOAT_EQ(cpowk(0.5f + 0.6if, -5), -1.121837f + 3.252915if);
+
+  EXPECT_COMPLEX_FLOAT_EQ(
+      cpowk(0.f + 1if, std::numeric_limits<std::int64_t>::min()), 1.f + 0if);
+}
+
+TEST(Complex, zpowi) {
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, 0), 1. + 0i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, 1), 3. + 4i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, 2), -7. + 24i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, 3), -117. + 44i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, 4), -527. - 336i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(3. + 4i, -2), -0.0112 - 0.0384i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(2. + 1i, 10), -237. - 3116i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(0.5 + 0.6i, -10), -9.32293628 - 7.29848564i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(2. + 1i, 5), -38. + 41i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowi(0.5 + 0.6i, -5), -1.12183773 + 3.25291503i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(
+      zpowi(0. + 1i, std::numeric_limits<std::int32_t>::min()), 1. + 0i);
+}
+
+TEST(Complex, zpowk) {
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, 0), 1. + 0i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, 1), 3. + 4i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, 2), -7. + 24i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, 3), -117. + 44i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, 4), -527. - 336i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(3. + 4i, -2), -0.0112 - 0.0384i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(2. + 1i, 10), -237. - 3116i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(0.5 + 0.6i, -10), -9.32293628 - 7.29848564i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(2. + 1i, 5l), -38. + 41i);
+  EXPECT_COMPLEX_DOUBLE_EQ(zpowk(0.5 + 0.6i, -5), -1.12183773 + 3.25291503i);
+
+  EXPECT_COMPLEX_DOUBLE_EQ(
+      zpowk(0. + 1i, std::numeric_limits<std::int64_t>::min()), 1. + 0i);
+}
-- 
2.7.4