[OpenCL] Handle -cl-fp32-correctly-rounded-divide-sqrt

author Yaxun Liu <Yaxun.Liu@amd.com>

Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)

committer Yaxun Liu <Yaxun.Liu@amd.com>

Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)
author Yaxun Liu <Yaxun.Liu@amd.com>
Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)
committer Yaxun Liu <Yaxun.Liu@amd.com>
Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td

index f73fba9..6983c57 100644 (file)
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -389,6 +389,8 @@ def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]
    HelpText<"OpenCL language standard to compile for.">;
  def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>, Flags<[CC1Option]>,
    HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
+def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
+  HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">;
  def client__name : JoinedOrSeparate<["-"], "client_name">;
  def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>;
  def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
diff --git a/clang/include/clang/Frontend/CodeGenOptions.def b/clang/include/clang/Frontend/CodeGenOptions.def

index 8e8ca38..51b0cf6 100644 (file)
--- a/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/clang/include/clang/Frontend/CodeGenOptions.def
@@ -111,6 +111,7 @@ CODEGENOPT(NoInline          , 1, 0) ///< Set when -fno-inline is enabled.
                                       ///< Disables use of the inline keyword.
  CODEGENOPT(NoNaNsFPMath      , 1, 0) ///< Assume FP arguments, results not NaN.
  CODEGENOPT(FlushDenorm       , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
+CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
  CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
  /// \brief Method of Objective-C dispatch to use.
  ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp

index 242b596..fdd83ea 100644 (file)
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1734,6 +1734,9 @@ void CodeGenModule::ConstructAttributeList(
                             llvm::utostr(CodeGenOpts.SSPBufferSize));
      FuncAttrs.addAttribute("no-signed-zeros-fp-math",
                             llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+    FuncAttrs.addAttribute(
+        "correctly-rounded-divide-sqrt-fp-math",
+        llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
  
      if (CodeGenOpts.StackRealignment)
        FuncAttrs.addAttribute("stackrealign");
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp

index 343d3fb..f393687 100644 (file)
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2276,8 +2276,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
  
    if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
      llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
-    if (CGF.getLangOpts().OpenCL) {
-      // OpenCL 1.1 7.4: minimum accuracy of single precision / is 2.5ulp
+    if (CGF.getLangOpts().OpenCL &&
+        !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) {
+      // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
+      // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
+      // build option allows an application to specify that single precision
+      // floating-point divide (x/y and 1/x) and sqrt used in the program
+      // source are correctly rounded.
        llvm::Type *ValTy = Val->getType();
        if (ValTy->isFloatTy() ||
            (isa<llvm::VectorType>(ValTy) &&
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp

index a9d940f..141d12e 100644 (file)
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -5282,6 +5282,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
    if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) {
      CmdArgs.push_back("-cl-denorms-are-zero");
    }
+  if (Args.getLastArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt)) {
+    CmdArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt");
+  }
  
    // Forward -f options with positive and negative forms; we translate
    // these by hand.
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp

index bbb7f2b..e19b612 100644 (file)
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -572,6 +572,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
    Opts.NoSignedZeros = (Args.hasArg(OPT_fno_signed_zeros) ||
                          Args.hasArg(OPT_cl_no_signed_zeros));
    Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
+  Opts.CorrectlyRoundedDivSqrt =
+      Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
    Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
    Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
    Opts.BackendOptions = Args.getAllArgValues(OPT_backend_option);
diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl

index 88df3bf..780c95f 100644 (file)
--- a/clang/test/CodeGenOpenCL/fpmath.cl
+++ b/clang/test/CodeGenOpenCL/fpmath.cl
@@ -1,16 +1,23 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
  
  typedef __attribute__(( ext_vector_type(4) )) float float4;
  
  float spscalardiv(float a, float b) {
    // CHECK: @spscalardiv
-  // CHECK: fdiv{{.*}}, !fpmath ![[MD:[0-9]+]]
+  // CHECK: #[[ATTR:[0-9]+]]
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD:[0-9]+]]
+  // DIVOPT-NOT: !fpmath ![[MD:[0-9]+]]
    return a / b;
  }
  
  float4 spvectordiv(float4 a, float4 b) {
    // CHECK: @spvectordiv
-  // CHECK: fdiv{{.*}}, !fpmath ![[MD]]
+  // CHECK: #[[ATTR]]
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD]]
+  // DIVOPT-NOT: !fpmath ![[MD]]
    return a / b;
  }
  
@@ -18,8 +25,13 @@ float4 spvectordiv(float4 a, float4 b) {
  
  double dpscalardiv(double a, double b) {
    // CHECK: @dpscalardiv
+  // CHECK: #[[ATTR]]
    // CHECK-NOT: !fpmath
    return a / b;
  }
  
-// CHECK: ![[MD]] = !{float 2.500000e+00}
+// CHECK: attributes #[[ATTR]] = {
+// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK: }
+// NODIVOPT: ![[MD]] = !{float 2.500000e+00}
diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl

index b2656c2..d68d424 100644 (file)
--- a/clang/test/Driver/opencl.cl
+++ b/clang/test/Driver/opencl.cl
@@ -12,6 +12,7 @@
  // RUN: %clang -S -### -cl-mad-enable %s 2>&1 | FileCheck --check-prefix=CHECK-MAD-ENABLE %s
  // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
  // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
+// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
  // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
  // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
  
@@ -29,6 +30,7 @@
  // CHECK-MAD-ENABLE: "-cc1" {{.*}} "-cl-mad-enable"
  // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"
  // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero"
+// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
  // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
  // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'
author	Yaxun Liu <Yaxun.Liu@amd.com>
	Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)
committer	Yaxun Liu <Yaxun.Liu@amd.com>
	Tue, 9 Aug 2016 20:10:18 +0000 (20:10 +0000)
clang/include/clang/Driver/Options.td		patch \| blob \| history
clang/include/clang/Frontend/CodeGenOptions.def		patch \| blob \| history
clang/lib/CodeGen/CGCall.cpp		patch \| blob \| history
clang/lib/CodeGen/CGExprScalar.cpp		patch \| blob \| history
clang/lib/Driver/Tools.cpp		patch \| blob \| history
clang/lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
clang/test/CodeGenOpenCL/fpmath.cl		patch \| blob \| history
clang/test/Driver/opencl.cl		patch \| blob \| history