[OpenMP][FIX] Do not use TBAA in type punning reduction GPU code PR46156

author Johannes Doerfert <johannes@jdoerfert.de>

Sat, 15 Aug 2020 22:27:14 +0000 (17:27 -0500)

committer Johannes Doerfert <johannes@jdoerfert.de>

Sun, 16 Aug 2020 19:38:31 +0000 (14:38 -0500)
author Johannes Doerfert <johannes@jdoerfert.de>
Sat, 15 Aug 2020 22:27:14 +0000 (17:27 -0500)
committer Johannes Doerfert <johannes@jdoerfert.de>
Sun, 16 Aug 2020 19:38:31 +0000 (14:38 -0500)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

index 1b2608e..d9ef6c2 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2845,8 +2845,12 @@ static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
    Address CastItem = CGF.CreateMemTemp(CastTy);
    Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
        CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
-  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
-  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
+  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
+                        LValueBaseInfo(AlignmentSource::Type),
+                        TBAAAccessInfo());
+  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,
+                              LValueBaseInfo(AlignmentSource::Type),
+                              TBAAAccessInfo());
  }
  
  /// This function creates calls to one of two shuffle functions to copy
@@ -2933,9 +2937,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
                         ThenBB, ExitBB);
        CGF.EmitBlock(ThenBB);
        llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
            IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
        Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
        Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
        PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
@@ -2944,9 +2953,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
        CGF.EmitBlock(ExitBB);
      } else {
        llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
            IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
        Ptr = Bld.CreateConstGEP(Ptr, 1);
        ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
      }
@@ -3100,12 +3114,14 @@ static void emitReductionListCopy(
      } else {
        switch (CGF.getEvaluationKind(Private->getType())) {
        case TEK_Scalar: {
-        llvm::Value *Elem =
-            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                                 Private->getType(), Private->getExprLoc());
+        llvm::Value *Elem = CGF.EmitLoadOfScalar(
+            SrcElementAddr, /*Volatile=*/false, Private->getType(),
+            Private->getExprLoc(), LValueBaseInfo(AlignmentSource::Type),
+            TBAAAccessInfo());
          // Store the source element value to the dest element address.
-        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
-                              Private->getType());
+        CGF.EmitStoreOfScalar(
+            Elem, DestElementAddr, /*Volatile=*/false, Private->getType(),
+            LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
          break;
        }
        case TEK_Complex: {
@@ -3250,8 +3266,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
    Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
    Address LocalReduceList(
        Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
+          CGF.EmitLoadOfScalar(
+              AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc,
+              LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo()),
            CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
        CGF.getPointerAlign());
  
@@ -3329,10 +3346,13 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
  
        // elem = *elemptr
        //*MediumPtr = elem
-      llvm::Value *Elem =
-          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+      llvm::Value *Elem = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, CType, Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
        // Store the source element value to the dest element address.
-      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
  
        Bld.CreateBr(MergeBB);
  
@@ -3712,8 +3732,9 @@ static llvm::Value *emitListToGlobalCopyFunction(
      GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
      switch (CGF.getEvaluationKind(Private->getType())) {
      case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
-                                            Private->getType(), Loc);
+      llvm::Value *V = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, Private->getType(), Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
        CGF.EmitStoreOfScalar(V, GlobLVal);
        break;
      }
@@ -3916,7 +3937,9 @@ static llvm::Value *emitGlobalToListCopyFunction(
      switch (CGF.getEvaluationKind(Private->getType())) {
      case TEK_Scalar: {
        llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
-      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
+      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType(),
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
        break;
      }
      case TEK_Complex: {
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp

new file mode 100644 (file)

index 0000000..8f814de
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+#include <complex>
+
+// Verify we do not add tbaa metadata to type punned memory operations:
+
+// CHECK:      call i64 @__kmpc_shuffle_int64(
+// CHECK-NEXT: store i64 %{{.*}}, i64* %{{.*}}, align {{[0-9]+$}}
+
+// CHECK:      call i64 @__kmpc_shuffle_int64(
+// CHECK-NEXT: store i64 %{{.*}}, i64* %{{.*}}, align {{[0-9]+$}}
+
+template <typename T>
+void complex_reduction() {
+#pragma omp target teams distribute
+  for (int ib = 0; ib < 100; ib++) {
+    std::complex<T> partial_sum;
+    const int istart = ib * 4;
+    const int iend = (ib + 1) * 4;
+#pragma omp parallel for reduction(+ \
+                                   : partial_sum)
+    for (int i = istart; i < iend; i++)
+      partial_sum += std::complex<T>(i, i);
+  }
+}
+
+void test() {
+  complex_reduction<float>();
+  complex_reduction<double>();
+}
+#endif
author	Johannes Doerfert <johannes@jdoerfert.de>
	Sat, 15 Aug 2020 22:27:14 +0000 (17:27 -0500)
committer	Johannes Doerfert <johannes@jdoerfert.de>
	Sun, 16 Aug 2020 19:38:31 +0000 (14:38 -0500)
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp		patch \| blob \| history
clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp	[new file with mode: 0644]	patch \| blob