From 3e0c512ce6cec6d7b83b87c49697adf5a67e4196 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 28 Dec 2021 23:55:32 -0600 Subject: [PATCH] [OpenMP] Simplify all stores in the device code Similar to loads, we want to be aggressive when it comes to store simplification. Not everything in LLVM handles dead stores well when address space casts are involved, we can simply ask the Attributor to do it for us though. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D109998 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 + .../Transforms/OpenMP/custom_state_machines.ll | 144 --------------------- llvm/test/Transforms/OpenMP/spmdization.ll | 50 ------- .../spmdization_guarding_two_reaching_kernels.ll | 3 - 4 files changed, 2 insertions(+), 197 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 4c578d5..464d749 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4509,6 +4509,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, UsedAssumedInformation); + } else if (auto *SI = dyn_cast(&I)) { + A.getOrCreateAAFor(IRPosition::value(*SI)); } } } diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index b9eda9e..e4cee1c 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1001,9 +1001,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1027,9 +1024,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1159,9 +1153,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1287,9 +1278,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1313,9 +1301,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1417,9 +1402,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1443,9 +1425,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1547,9 +1526,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1573,9 +1549,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1771,9 +1744,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1797,9 +1767,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1845,9 +1812,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -2012,9 +1976,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2038,9 +1999,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2169,9 +2127,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2296,9 +2251,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2322,9 +2274,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2425,9 +2374,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2451,9 +2397,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2554,9 +2497,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2580,9 +2520,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2776,9 +2713,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2802,9 +2736,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2850,9 +2781,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2977,9 +2905,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3003,9 +2928,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3088,9 +3010,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3173,9 +3092,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3199,9 +3115,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3262,9 +3175,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3288,9 +3198,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3351,9 +3258,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3377,9 +3281,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3507,9 +3408,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3533,9 +3431,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3581,9 +3476,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3708,9 +3600,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3734,9 +3623,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3819,9 +3705,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3904,9 +3787,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3930,9 +3810,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3993,9 +3870,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4019,9 +3893,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4082,9 +3953,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4108,9 +3976,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4238,9 +4103,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4264,9 +4126,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4312,9 +4171,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 22bbe97..5051bce 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -250,7 +250,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -304,7 +303,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -467,8 +465,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -479,8 +475,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -491,8 +485,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -503,8 +495,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -605,7 +595,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -659,7 +648,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -837,8 +825,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -849,8 +835,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -861,8 +845,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -873,8 +855,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -976,7 +956,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1030,7 +1009,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1221,8 +1199,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1236,8 +1212,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1251,8 +1225,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1266,8 +1238,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1374,7 +1344,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1428,7 +1397,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1654,8 +1622,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1669,8 +1635,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1684,8 +1648,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1699,8 +1661,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1860,7 +1820,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] @@ -1907,7 +1866,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] @@ -2325,8 +2283,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -2337,8 +2293,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2349,8 +2303,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2361,8 +2313,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index cb01af9..6b1983d 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -193,9 +193,6 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] ; CHECK-NEXT: ret void -- 2.7.4