/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
-
- /// Maximal size of the shared memory buffer.
- SharedMemorySize = 128,
};
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, true));
- IsInTargetMasterThreadRegion = IsSPMD;
+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
emitGenericVarsEpilog(CGF);
CGBuilderTy &Bld = CGF.Builder;
- OMPBuilder.createTargetDeinit(Bld, IsSPMD, true);
+ OMPBuilder.createTargetDeinit(Bld, IsSPMD);
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
(~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
} // anonymous namespace
-unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
- switch (getExecutionMode()) {
- case EM_SPMD:
- return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
- case EM_NonSPMD:
- return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
- case EM_Unknown:
- return UndefinedMode;
- }
- llvm_unreachable("Unknown flags are requested.");
-}
-
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM) {
llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true,
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
// Emit target region as a standalone region.
- class NVPTXPrePostActionTy : public PrePostActionTy {
- bool &IsInParallelRegion;
- bool PrevIsInParallelRegion;
-
- public:
- NVPTXPrePostActionTy(bool &IsInParallelRegion)
- : IsInParallelRegion(IsInParallelRegion) {}
- void Enter(CodeGenFunction &CGF) override {
- PrevIsInParallelRegion = IsInParallelRegion;
- IsInParallelRegion = true;
- }
- void Exit(CodeGenFunction &CGF) override {
- IsInParallelRegion = PrevIsInParallelRegion;
- }
- } Action(IsInParallelRegion);
- CodeGen.setAction(Action);
bool PrevIsInTTDRegion = IsInTTDRegion;
IsInTTDRegion = false;
- bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
- IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
- IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
- if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
- !IsInParallelRegion) {
+ if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) {
llvm::Function *WrapperFun =
createParallelDataSharingWrapper(OutlinedFun, D);
WrapperFunctionsMap[OutlinedFun] = WrapperFun;
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
Data.insert(std::make_pair(VD, MappedVarData()));
}
- if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
- CheckVarsEscapingDeclContext VarChecker(CGF, std::nullopt);
- VarChecker.Visit(Body);
- I->getSecond().SecondaryLocalVarData.emplace();
- DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData;
- for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
- assert(VD->isCanonicalDecl() && "Expected canonical declaration");
- Data.insert(std::make_pair(VD, MappedVarData()));
- }
- }
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
/// Constant for NVPTX for better optimization.
bool isDefaultLocationConstant() const override { return true; }
- /// Returns additional flags that can be stored in reserved_2 field of the
- /// default location.
- /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
- /// Full/Lightweight runtime mode. Used for better optimization.
- unsigned getDefaultLocationReserved2Flags() const override;
-
public:
explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
void clear() override;
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
- /// true if we're emitting the code for the target region and next parallel
- /// region is L0 for sure.
- bool IsInTargetMasterThreadRegion = false;
/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
- /// true if we're definitely in the parallel region.
- bool IsInParallelRegion = false;
/// Map between an outlined function and its wrapper.
llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
- llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = std::nullopt;
EscapedParamsTy EscapedParameters;
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
EscapedVariableLengthDeclsAddrs;
- llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
};
/// Maps the function to the list of the globalized variables with their
/// reductions.
/// All the records are gathered into a union `union.type` is created.
llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
- /// Shared pointer for the global memory in the global memory buffer used for
- /// the given kernel.
- llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
/// Pair of the Non-SPMD team and all reductions variables in this team
/// region.
std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
int arr[N];
-// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true, i1 true)
+// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true)
#pragma omp target
for (int i = 0; i < N; i++) {
arr[i] = 1;
int arr[N];
-// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false)
#pragma omp target simd
for (int i = 0; i < N; i++) {
arr[i] = 1;
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]]
-// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR6]]
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]]
+// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
// CHECK1-NEXT: ret void
// CHECK1-SAME: () #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR6]]
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR7]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
// CHECK1-NEXT: ret i32 [[CALL]]
//
int a;
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
-// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
+// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
+// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
+// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
void foo() {
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams distribute parallel for simd if(a)
for (int i = 0; i < 10; ++i)
;
int a;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams distribute parallel for lastprivate(a)
#pragma omp target teams distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init(
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target parallel for if(a)
for (int i = 0; i < 10; ++i)
#pragma omp target parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
#pragma omp target parallel if(a)
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp parallel for
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: store i32 100, ptr [[B]], align 4
// CHECK-NEXT: store i32 1000, ptr [[C]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0
-// CHECK-NEXT: store ptr [[B]], ptr [[TMP5]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1
-// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[B]], ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1
+// CHECK-NEXT: store ptr [[A]], ptr [[TMP4]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
-// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR4]]
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]]
// CHECK-NEXT: ret void
//
// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
-// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
-// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK4: cond.true:
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK4-NEXT: br label [[COND_END:%.*]]
// CHECK4: cond.false:
-// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: br label [[COND_END]]
// CHECK4: cond.end:
-// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]]
+// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
-// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP18]] to ptr
+// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
+// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to ptr
-// CHECK4-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP27]], align 8
-// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8
-// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
-// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP31]], align 8
-// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
-// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP33]], align 8
-// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6
-// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8
-// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
-// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7)
+// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 8
+// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8
+// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP27]], align 8
+// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
+// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP28]], align 8
+// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6
+// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP29]], align 8
+// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK4-NEXT: [[TMP31:%.*]] = zext i1 [[TOBOOL]] to i32
+// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
+// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7)
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]]
+// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]]
+// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
// CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]]
+// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
// CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]]
+// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]]
// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
// CHECK4: cond.true12:
-// CHECK4-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK4-NEXT: br label [[COND_END14:%.*]]
// CHECK4: cond.false13:
-// CHECK4-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: br label [[COND_END14]]
// CHECK4: cond.end14:
-// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ]
+// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE12]] ], [ [[TMP43]], [[COND_FALSE13]] ]
// CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP52]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
-// CHECK4-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
-// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP54]])
-// CHECK4-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
-// CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
+// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]])
+// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK4-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK4-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK4: .omp.lastprivate.then:
// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i64 40, i1 false)
// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false)
-// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64
-// CHECK4-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]]
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP14]] to i64
+// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP15]]
// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]]
// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]]
-// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4
-// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
+// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]]
// CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]]
-// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[I6]], align 4
-// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64
+// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]]
// CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]]
-// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[I6]], align 4
-// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64
+// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP19]] to i64
// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]]
// CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
-// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
+// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
+// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK4: .omp.lastprivate.then:
// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false)
// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK5: user_code.entry:
-// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
-// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK5-NEXT: ret void
// CHECK5: worker.exit:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK5: cond.true:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK5-NEXT: br label [[COND_END:%.*]]
// CHECK5: cond.false:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK5-NEXT: br label [[COND_END]]
// CHECK5: cond.end:
-// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]]
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to ptr
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK5-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP17]] to ptr
+// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4
+// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP18]] to ptr
// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4
-// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to ptr
-// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4
-// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 4
-// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
-// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4
-// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
-// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP29]], align 4
-// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5
-// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP31]], align 4
-// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6
-// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4
-// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0
-// CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4
-// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7)
+// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP23]], align 4
+// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4
+// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP25]], align 4
+// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5
+// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP26]], align 4
+// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6
+// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP27]], align 4
+// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0
+// CHECK5-NEXT: [[TMP29:%.*]] = zext i1 [[TOBOOL]] to i32
+// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7)
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]]
+// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]]
+// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]]
+// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]]
+// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
// CHECK5: cond.true12:
-// CHECK5-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK5-NEXT: br label [[COND_END14:%.*]]
// CHECK5: cond.false13:
-// CHECK5-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK5-NEXT: br label [[COND_END14]]
// CHECK5: cond.end14:
-// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ]
+// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE12]] ], [ [[TMP41]], [[COND_FALSE13]] ]
// CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP50]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
-// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP52]])
-// CHECK5-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0
-// CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
+// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
+// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
+// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK5: .omp.lastprivate.then:
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i32 40, i1 false)
// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false)
-// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]]
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]]
// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4
-// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP19]]
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP17]]
// CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4
-// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP20]]
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP18]]
// CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]]
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4
-// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP21]]
+// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]]
// CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]]
// CHECK5-NEXT: store i32 [[ADD16]], ptr [[TMP1]], align 4
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
+// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
+// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK5: .omp.lastprivate.then:
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false)
// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP31]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP33]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8
-// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP36]], align 8
-// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP38]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP43]], align 8
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP32]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP34]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP40]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP46]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP51]], align 8
-// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP53]], align 8
-// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP55]], align 8
-// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP56]], align 8
-// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP58]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP49]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP53]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
+// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP55]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8
+// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP56]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
+// CHECK1-NEXT: store ptr null, ptr [[TMP57]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
+// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP58]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9
+// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP59]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP61]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP63]], align 8
-// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP66]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP68]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP70]], align 8
-// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP71]], align 8
-// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP73]], align 8
-// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
-// CHECK1-NEXT: store ptr null, ptr [[TMP75]], align 8
-// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
-// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP76]], align 8
-// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9
-// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP78]], align 8
-// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
-// CHECK1-NEXT: store ptr null, ptr [[TMP80]], align 8
-// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10
-// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP81]], align 8
-// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10
-// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP83]], align 8
-// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10
-// CHECK1-NEXT: store ptr null, ptr [[TMP85]], align 8
-// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10
+// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP61]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10
+// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP62]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10
+// CHECK1-NEXT: store ptr null, ptr [[TMP63]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP88]], align 4
-// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 11, ptr [[TMP89]], align 4
-// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP90]], align 8
-// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP91]], align 8
-// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP92]], align 8
-// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP93]], align 8
-// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP94]], align 8
-// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP95]], align 8
-// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP96]], align 8
-// CHECK1-NEXT: [[TMP97:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
-// CHECK1-NEXT: br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP66]], align 4
+// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 11, ptr [[TMP67]], align 4
+// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP68]], align 8
+// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP69]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP70]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP71]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP72]], align 8
+// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP73]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP74]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0
+// CHECK1-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP11]], ptr [[TMP12]], ptr [[TMP13]], ptr [[TMP14]], ptr [[A]], ptr [[TMP15]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
-// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[B]], align 8
-// CHECK1-NEXT: store ptr [[TMP99]], ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[C]], align 8
-// CHECK1-NEXT: store ptr [[TMP100]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT: [[TMP101:%.*]] = load ptr, ptr [[L]], align 8
-// CHECK1-NEXT: store ptr [[TMP101]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT: [[TMP102:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[_TMP5]], align 8
-// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[D]], align 8
-// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[_TMP6]], align 8
-// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8
-// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8
-// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP114:%.*]] = load ptr, ptr [[TMP113]], align 8
-// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3
-// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3
-// CHECK1-NEXT: [[TMP117:%.*]] = load ptr, ptr [[TMP116]], align 8
-// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4
-// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4
-// CHECK1-NEXT: [[TMP120:%.*]] = load ptr, ptr [[TMP119]], align 8
-// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP121]], align 8
-// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP123]], align 8
-// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[B]], align 8
+// CHECK1-NEXT: store ptr [[TMP77]], ptr [[_TMP4]], align 8
+// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[C]], align 8
+// CHECK1-NEXT: store ptr [[TMP78]], ptr [[_TMP5]], align 8
+// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[L]], align 8
+// CHECK1-NEXT: store ptr [[TMP79]], ptr [[_TMP6]], align 8
+// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[_TMP5]], align 8
+// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[D]], align 8
+// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[_TMP6]], align 8
+// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP85]], align 8
+// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP89:%.*]] = load ptr, ptr [[TMP88]], align 8
+// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[TMP91]], align 8
+// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3
+// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3
+// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP94]], align 8
+// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4
+// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4
+// CHECK1-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8
+// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP99]], align 8
+// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP100]], align 8
+// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP101]], align 8
+// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP102]], align 8
+// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP103]], align 8
+// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP104]], align 8
+// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP105]], align 8
+// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP106]], align 8
+// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP107]], align 8
+// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP108]], align 8
+// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP109]], align 8
+// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3
+// CHECK1-NEXT: store ptr null, ptr [[TMP110]], align 8
+// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP111]], align 8
+// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP112]], align 8
+// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP113]], align 8
+// CHECK1-NEXT: [[TMP114:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5
+// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP114]], align 8
+// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5
+// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP115]], align 8
+// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5
+// CHECK1-NEXT: store ptr null, ptr [[TMP116]], align 8
+// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6
+// CHECK1-NEXT: store ptr [[TMP84]], ptr [[TMP117]], align 8
+// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6
+// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP118]], align 8
+// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP119]], align 8
+// CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7
+// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP120]], align 8
+// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7
+// CHECK1-NEXT: store ptr [[TMP89]], ptr [[TMP121]], align 8
+// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP122]], align 8
+// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8
+// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP123]], align 8
+// CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8
+// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP124]], align 8
+// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8
// CHECK1-NEXT: store ptr null, ptr [[TMP125]], align 8
-// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP126]], align 8
-// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP128]], align 8
-// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP130]], align 8
-// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP131]], align 8
-// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP133]], align 8
-// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP135]], align 8
-// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP136]], align 8
-// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP138]], align 8
-// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3
-// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8
-// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP141]], align 8
-// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP143]], align 8
-// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP145]], align 8
-// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5
-// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP146]], align 8
-// CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5
-// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP148]], align 8
-// CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5
-// CHECK1-NEXT: store ptr null, ptr [[TMP150]], align 8
-// CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6
-// CHECK1-NEXT: store ptr [[TMP106]], ptr [[TMP151]], align 8
-// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6
-// CHECK1-NEXT: store ptr [[TMP108]], ptr [[TMP153]], align 8
-// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP155]], align 8
-// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[TMP109]], ptr [[TMP156]], align 8
-// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[TMP111]], ptr [[TMP158]], align 8
-// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP160]], align 8
-// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[TMP112]], ptr [[TMP161]], align 8
-// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[TMP114]], ptr [[TMP163]], align 8
-// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8
-// CHECK1-NEXT: store ptr null, ptr [[TMP165]], align 8
-// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9
-// CHECK1-NEXT: store ptr [[TMP115]], ptr [[TMP166]], align 8
-// CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9
-// CHECK1-NEXT: store ptr [[TMP117]], ptr [[TMP168]], align 8
-// CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9
-// CHECK1-NEXT: store ptr null, ptr [[TMP170]], align 8
-// CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10
-// CHECK1-NEXT: store ptr [[TMP118]], ptr [[TMP171]], align 8
-// CHECK1-NEXT: [[TMP173:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10
-// CHECK1-NEXT: store ptr [[TMP120]], ptr [[TMP173]], align 8
-// CHECK1-NEXT: [[TMP175:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10
-// CHECK1-NEXT: store ptr null, ptr [[TMP175]], align 8
-// CHECK1-NEXT: [[TMP176:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP177:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9
+// CHECK1-NEXT: store ptr [[TMP93]], ptr [[TMP126]], align 8
+// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9
+// CHECK1-NEXT: store ptr [[TMP95]], ptr [[TMP127]], align 8
+// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9
+// CHECK1-NEXT: store ptr null, ptr [[TMP128]], align 8
+// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10
+// CHECK1-NEXT: store ptr [[TMP96]], ptr [[TMP129]], align 8
+// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10
+// CHECK1-NEXT: store ptr [[TMP98]], ptr [[TMP130]], align 8
+// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10
+// CHECK1-NEXT: store ptr null, ptr [[TMP131]], align 8
+// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP178]], align 4
-// CHECK1-NEXT: [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
-// CHECK1-NEXT: store i32 11, ptr [[TMP179]], align 4
-// CHECK1-NEXT: [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP176]], ptr [[TMP180]], align 8
-// CHECK1-NEXT: [[TMP181:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP177]], ptr [[TMP181]], align 8
-// CHECK1-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP182]], align 8
-// CHECK1-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP183]], align 8
-// CHECK1-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP184]], align 8
-// CHECK1-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP185]], align 8
-// CHECK1-NEXT: [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP186]], align 8
-// CHECK1-NEXT: [[TMP187:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]])
-// CHECK1-NEXT: [[TMP188:%.*]] = icmp ne i32 [[TMP187]], 0
-// CHECK1-NEXT: br i1 [[TMP188]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
+// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP134]], align 4
+// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
+// CHECK1-NEXT: store i32 11, ptr [[TMP135]], align 4
+// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP132]], ptr [[TMP136]], align 8
+// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP133]], ptr [[TMP137]], align 8
+// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP138]], align 8
+// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP139]], align 8
+// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8
+// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP141]], align 8
+// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP142]], align 8
+// CHECK1-NEXT: [[TMP143:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]])
+// CHECK1-NEXT: [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0
+// CHECK1-NEXT: br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
// CHECK1: omp_offload.failed11:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP102]], ptr [[TMP103]], ptr [[TMP104]], ptr [[A]], ptr [[TMP105]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP80]], ptr [[TMP81]], ptr [[TMP82]], ptr [[A]], ptr [[TMP83]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT12]]
// CHECK1: omp_offload.cont12:
-// CHECK1-NEXT: [[TMP189:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK1-NEXT: [[TMP145:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN1S3fooEv(ptr noundef nonnull align 4 dereferenceable(4) @s)
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP189]], [[CALL]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP145]], [[CALL]]
// CHECK1-NEXT: ret i32 [[ADD]]
//
//
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP4]], i64 40, i1 false)
// CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B5]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B5]], align 4
// CHECK1-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C7]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C7]], align 4
// CHECK1-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP11]])
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP9]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false)
// CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4
// CHECK1-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4
// CHECK1-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP14]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP12]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP2]], i32 0, i32 0
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP23]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP17]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
+// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27(ptr [[THIS1]], ptr [[TMP2]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
-// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[L]], align 8
-// CHECK1-NEXT: store ptr [[TMP33]], ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP37]], align 8
-// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP39]], align 8
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP44]], align 8
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP36]], ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[L]], align 8
+// CHECK1-NEXT: store ptr [[TMP27]], ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP31]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP32]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP34]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP54]], align 4
-// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP55]], align 4
-// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8
-// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP58]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP59]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8
-// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK1-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0
-// CHECK1-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP42]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP43]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP41]], ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0
+// CHECK1-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
// CHECK1: omp_offload.failed7:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP34]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP28]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]]
// CHECK1: omp_offload.cont8:
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[A]], align 4
-// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[L]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP66]])
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP65]], [[CALL]]
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[L]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP54]])
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP53]], [[CALL]]
// CHECK1-NEXT: ret i32 [[ADD]]
//
//
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false)
// CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false)
// CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 2, ptr [[TMP17]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
-// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP17]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18(ptr [[TMP1]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false)
// CHECK1-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP2]])
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false)
// CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7:[0-9]+]]
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false)
// CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]]
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8
// CHECK2-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false)
// CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4
// CHECK2-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4
// CHECK2-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8
-// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8
+// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 2
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8
// CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8
-// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3
-// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 3
+// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7]]
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
-// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
-// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false)
// CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK2-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4
// CHECK2-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4
// CHECK2-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8
+// CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8
// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 2
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8
-// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 3
-// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 4
-// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]]
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3
+// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4
+// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false)
// CHECK2-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8
// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false)
// CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK3-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4
// CHECK3-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4
// CHECK3-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8
-// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8
+// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8
// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8
-// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8
+// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7:[0-9]+]]
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8
// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
-// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false)
// CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK3-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4
// CHECK3-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK3-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4
// CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8
+// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8
// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 2
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8
-// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 4
-// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8
-// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8
-// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]]
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4
+// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8
+// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false)
// CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7]]
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false)
// CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]]
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false)
// CHECK3-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
+// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]]
// CHECK3-NEXT: ret void
//
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
+// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK2: atomic_cont:
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
+// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
-// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16
-// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
+// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR3]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32
-// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1
-// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16
-// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
+// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
+// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK-NEXT: store ptr [[D]], ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4)
-// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8
// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1
-// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2
-// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16
-// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
+// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
+// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP10]] to double
-// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00
-// CHECK1-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float
-// CHECK1-NEXT: store float [[CONV7]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3
-// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
-// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP11]] to double
-// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00
-// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float
-// CHECK1-NEXT: store float [[CONV11]], ptr [[ARRAYIDX8]], align 4
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
-// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP12]], 1.000000e+00
-// CHECK1-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8
+// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double
+// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00
+// CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float
+// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3
+// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
+// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double
+// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00
+// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float
+// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8
+// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00
+// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP5]]
-// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX15]], i64 3
-// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX16]], align 8
-// CHECK1-NEXT: [[ADD17:%.*]] = fadd double [[TMP14]], 1.000000e+00
-// CHECK1-NEXT: store double [[ADD17]], ptr [[ARRAYIDX16]], align 8
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]]
+// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
+// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8
+// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00
+// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8
// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0
// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8
-// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP15]], 1
-// CHECK1-NEXT: store i64 [[ADD18]], ptr [[X]], align 8
+// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1
+// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8
// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1
// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8
-// CHECK1-NEXT: [[CONV19:%.*]] = sext i8 [[TMP16]] to i32
-// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1
-// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8
-// CHECK1-NEXT: store i8 [[CONV21]], ptr [[Y]], align 8
+// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1
+// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8
+// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8
// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]]
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
-// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP17]], 1
-// CHECK1-NEXT: store i64 [[ADD22]], ptr [[CALL]], align 8
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
+// CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
-// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16
-// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
+// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
-// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1
-// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8
-// CHECK1-NEXT: store i8 [[CONV8]], ptr [[AAA_ADDR]], align 1
+// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
+// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
+// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP5]] to double
-// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00
+// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
+// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8
-// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A4]], align 8
+// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8
// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
-// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8
-// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16
+// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8
+// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16
// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP7]]
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
-// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
-// CHECK1-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A7]], align 8
-// CHECK1-NEXT: [[CONV8:%.*]] = fptosi double [[TMP8]] to i32
-// CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], ptr nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR10]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
+// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
+// CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8
+// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
+// CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
+// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4)
-// CHECK1-NEXT: ret i32 [[TMP7]]
+// CHECK1-NEXT: ret i32 [[TMP4]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK1-SAME: () #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1: 1:
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
-// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
-// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
+// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4
// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK2-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1
-// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32
-// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2
-// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16
-// CHECK2-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
+// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
+// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
+// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
// CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32
-// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
-// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
-// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
+// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
-// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32
-// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1
-// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8
-// CHECK2-NEXT: store i8 [[CONV7]], ptr [[AAA_ADDR]], align 1
+// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
+// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
+// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
+// CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
// CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4)
-// CHECK2-NEXT: ret i32 [[TMP7]]
+// CHECK2-NEXT: ret i32 [[TMP4]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK2-SAME: () #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2: 1:
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32
-// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1
-// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16
-// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
+// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4
+// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: ret void
//
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
//
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN]](
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
//
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN1]](
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
//
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN2]](
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-64-SAME: () #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: if.end:
-// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
//
//
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-32-SAME: () #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: if.end:
-// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
//
}
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
// CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait(
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void
#endif
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
+// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]]
// CHECK2-NEXT: ret void
//
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8
// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4
// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]]
// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]]
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]]
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]]
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]]
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]]
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]]
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]]
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]]
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
// CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
}
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48(
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], ptr {{.+}},
-// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
-// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
+// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
+// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: store {{.+}} 99, ptr [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, ptr [[COMB_UB]],
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC3]], align 4
+// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC3]]) #[[ATTR3:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC3]], i64 4)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
+// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
-// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC2]], align 8
+// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC2]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC2]], i64 8)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
-// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
-// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8
-// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8
+// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]]
// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]])
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
-// CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]])
-// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
+// CHECK1-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK1-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
+// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
+// CHECK1-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
+// CHECK1-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
-// CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1
-// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0
-// CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]]
-// CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]]
-// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]]
-// CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK1-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
+// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
+// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
+// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK1: then4:
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8
-// CHECK1-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8
-// CHECK1-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8
+// CHECK1-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8
// CHECK1-NEXT: br label [[IFCONT6:%.*]]
// CHECK1: else5:
// CHECK1-NEXT: br label [[IFCONT6]]
// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
// CHECK1-NEXT: br label [[PRECOND:%.*]]
// CHECK1: precond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2
-// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
+// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK1: body:
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK1: then2:
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4
-// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// CHECK1-NEXT: br label [[IFCONT4:%.*]]
// CHECK1: else3:
// CHECK1-NEXT: br label [[IFCONT4]]
// CHECK1: ifcont4:
-// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
// CHECK1-NEXT: br label [[PRECOND]]
// CHECK1: exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8
-// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128
-// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
+// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
-// CHECK1-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1)
-// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1
+// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1)
+// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
-// CHECK1-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK1-NEXT: store float [[TMP2]], ptr [[D3]], align 4
+// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+// CHECK1-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i64 4)
-// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
-// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
-// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
+// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1
-// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1
-// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32
+// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1
+// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32
+// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32
// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK1-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4
-// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4
+// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]]
// CHECK1-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1
-// CHECK1-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]])
-// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8
-// CHECK1-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i64 1
-// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK1-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
+// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
+// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
+// CHECK1-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i64 1
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
+// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
+// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK1-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1
-// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0
-// CHECK1-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]]
-// CHECK1-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK1-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]]
-// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]]
-// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]]
-// CHECK1-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]]
-// CHECK1-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 8
-// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1
-// CHECK1-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1
-// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4
-// CHECK1-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
+// CHECK1-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4
+// CHECK1-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4
// CHECK1-NEXT: br label [[IFCONT7:%.*]]
// CHECK1: else6:
// CHECK1-NEXT: br label [[IFCONT7]]
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK1-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+// CHECK1-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK1: then2:
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1
-// CHECK1-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
+// CHECK1-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
// CHECK1-NEXT: br label [[IFCONT4:%.*]]
// CHECK1: else3:
// CHECK1-NEXT: br label [[IFCONT4]]
// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK1: then6:
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
// CHECK1-NEXT: br label [[IFCONT8:%.*]]
// CHECK1: else7:
// CHECK1-NEXT: br label [[IFCONT8]]
// CHECK1: ifcont8:
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK1: then10:
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4
-// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
// CHECK1-NEXT: br label [[IFCONT12:%.*]]
// CHECK1: else11:
// CHECK1-NEXT: br label [[IFCONT12]]
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4
-// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
+// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128
-// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128
-// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
+// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
+// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
-// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1
-// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
+// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32
-// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
+// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2
-// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]])
+// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1: .omp.reduction.done:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
-// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1
-// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
+// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32
-// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32
+// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32
// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
// CHECK1: cond.true9:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: br label [[COND_END11:%.*]]
// CHECK1: cond.false10:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK1-NEXT: br label [[COND_END11]]
// CHECK1: cond.end11:
-// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ]
+// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ]
// CHECK1-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2
// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]])
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1
-// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8
-// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK1-NEXT: br label [[IFCONT7:%.*]]
// CHECK1: else6:
// CHECK1-NEXT: br label [[IFCONT7]]
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK1: then2:
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK1-NEXT: br label [[IFCONT4:%.*]]
// CHECK1: else3:
// CHECK1-NEXT: br label [[IFCONT4]]
// CHECK1: ifcont4:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK1: then6:
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK1-NEXT: br label [[IFCONT8:%.*]]
// CHECK1: else7:
// CHECK1-NEXT: br label [[IFCONT8]]
// CHECK1: ifcont8:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK1: then10:
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK1-NEXT: br label [[IFCONT12:%.*]]
// CHECK1: else11:
// CHECK1-NEXT: br label [[IFCONT12]]
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1
-// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8
-// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK1-NEXT: br label [[IFCONT7:%.*]]
// CHECK1: else6:
// CHECK1-NEXT: br label [[IFCONT7]]
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK1: then2:
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK1-NEXT: br label [[IFCONT4:%.*]]
// CHECK1: else3:
// CHECK1-NEXT: br label [[IFCONT4]]
// CHECK1: ifcont4:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK1: then6:
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK1-NEXT: br label [[IFCONT8:%.*]]
// CHECK1: else7:
// CHECK1-NEXT: br label [[IFCONT8]]
// CHECK1: ifcont8:
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK1: then10:
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK1-NEXT: br label [[IFCONT12:%.*]]
// CHECK1: else11:
// CHECK1-NEXT: br label [[IFCONT12]]
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2
-// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
+// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128
-// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
+// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
+// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
-// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
-// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+// CHECK2-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
-// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8
-// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8
+// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]]
// CHECK2-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]])
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
-// CHECK2-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]])
-// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK2-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK2-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
+// CHECK2-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK2-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
+// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
+// CHECK2-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
+// CHECK2-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK2-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
-// CHECK2-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK2-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1
-// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0
-// CHECK2-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]]
-// CHECK2-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]]
-// CHECK2-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]]
-// CHECK2-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK2-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
+// CHECK2-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
+// CHECK2-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
+// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK2: then4:
-// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4
-// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4
-// CHECK2-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8
-// CHECK2-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8
+// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8
+// CHECK2-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8
// CHECK2-NEXT: br label [[IFCONT6:%.*]]
// CHECK2: else5:
// CHECK2-NEXT: br label [[IFCONT6]]
// CHECK2-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
// CHECK2-NEXT: br label [[PRECOND:%.*]]
// CHECK2: precond:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2
-// CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
+// CHECK2-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK2: body:
// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]]
-// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]]
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK2: then2:
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]]
-// CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4
-// CHECK2-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
+// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// CHECK2-NEXT: br label [[IFCONT4:%.*]]
// CHECK2: else3:
// CHECK2-NEXT: br label [[IFCONT4]]
// CHECK2: ifcont4:
-// CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
// CHECK2-NEXT: br label [[PRECOND]]
// CHECK2: exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8
-// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128
-// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
+// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
-// CHECK2-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
-// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1
+// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
+// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
// CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK2-NEXT: store float [[TMP2]], ptr [[D3]], align 4
+// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
+// CHECK2-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]]
-// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
+// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
-// CHECK2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
-// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
+// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1
-// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1
-// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32
+// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1
+// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32
+// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32
// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK2-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
-// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4
-// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4
+// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]]
// CHECK2-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1
-// CHECK2-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
-// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]])
-// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8
-// CHECK2-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK2-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1
-// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK2-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK2-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
+// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
+// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
+// CHECK2-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
+// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
+// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
-// CHECK2-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK2-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1
-// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0
-// CHECK2-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]]
-// CHECK2-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK2-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]]
-// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]]
-// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]]
-// CHECK2-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]]
-// CHECK2-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4
-// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4
-// CHECK2-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1
-// CHECK2-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1
-// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK2-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4
-// CHECK2-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
+// CHECK2-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4
+// CHECK2-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4
// CHECK2-NEXT: br label [[IFCONT7:%.*]]
// CHECK2: else6:
// CHECK2-NEXT: br label [[IFCONT7]]
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK2-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+// CHECK2-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK2: then2:
-// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1
-// CHECK2-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
+// CHECK2-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
// CHECK2-NEXT: br label [[IFCONT4:%.*]]
// CHECK2: else3:
// CHECK2-NEXT: br label [[IFCONT4]]
// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK2: then6:
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
// CHECK2-NEXT: br label [[IFCONT8:%.*]]
// CHECK2: else7:
// CHECK2-NEXT: br label [[IFCONT8]]
// CHECK2: ifcont8:
// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]]
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK2: then10:
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4
-// CHECK2-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4
-// CHECK2-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
// CHECK2-NEXT: br label [[IFCONT12:%.*]]
// CHECK2: else11:
// CHECK2-NEXT: br label [[IFCONT12]]
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4
-// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
+// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128
-// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128
-// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
+// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
+// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
-// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1
-// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
+// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32
-// CHECK2-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
+// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
-// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ]
+// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2
-// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]])
+// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK2: .omp.reduction.done:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
-// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1
-// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
+// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32
-// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32
+// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
// CHECK2: cond.true9:
-// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: br label [[COND_END11:%.*]]
// CHECK2: cond.false10:
-// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK2-NEXT: br label [[COND_END11]]
// CHECK2: cond.end11:
-// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ]
+// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ]
// CHECK2-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2
// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]])
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4
-// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4
-// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4
-// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK2-NEXT: br label [[IFCONT7:%.*]]
// CHECK2: else6:
// CHECK2-NEXT: br label [[IFCONT7]]
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK2: then2:
-// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK2-NEXT: br label [[IFCONT4:%.*]]
// CHECK2: else3:
// CHECK2-NEXT: br label [[IFCONT4]]
// CHECK2: ifcont4:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK2: then6:
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK2-NEXT: br label [[IFCONT8:%.*]]
// CHECK2: else7:
// CHECK2-NEXT: br label [[IFCONT8]]
// CHECK2: ifcont8:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK2: then10:
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK2-NEXT: br label [[IFCONT12:%.*]]
// CHECK2: else11:
// CHECK2-NEXT: br label [[IFCONT12]]
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4
-// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4
-// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4
-// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK2-NEXT: br label [[IFCONT7:%.*]]
// CHECK2: else6:
// CHECK2-NEXT: br label [[IFCONT7]]
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK2: then2:
-// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK2-NEXT: br label [[IFCONT4:%.*]]
// CHECK2: else3:
// CHECK2-NEXT: br label [[IFCONT4]]
// CHECK2: ifcont4:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK2: then6:
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK2-NEXT: br label [[IFCONT8:%.*]]
// CHECK2: else7:
// CHECK2-NEXT: br label [[IFCONT8]]
// CHECK2: ifcont8:
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK2: then10:
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK2-NEXT: br label [[IFCONT12:%.*]]
// CHECK2: else11:
// CHECK2-NEXT: br label [[IFCONT12]]
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2
-// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
+// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128
-// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
+// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
-// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
-// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
-// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8
-// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8
+// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8
+// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]]
// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]])
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8
-// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
-// CHECK3-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]])
-// CHECK3-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK3-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
+// CHECK3-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK3-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
+// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
+// CHECK3-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
+// CHECK3-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
-// CHECK3-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK3-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1
-// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0
-// CHECK3-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]]
-// CHECK3-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]]
-// CHECK3-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]]
-// CHECK3-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK3-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
+// CHECK3-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
+// CHECK3-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
+// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
+// CHECK3-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK3: then4:
-// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4
-// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4
-// CHECK3-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8
-// CHECK3-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8
+// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8
+// CHECK3-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8
// CHECK3-NEXT: br label [[IFCONT6:%.*]]
// CHECK3: else5:
// CHECK3-NEXT: br label [[IFCONT6]]
// CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
// CHECK3-NEXT: br label [[PRECOND:%.*]]
// CHECK3: precond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2
-// CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
+// CHECK3-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK3: body:
// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]]
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]]
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK3: then2:
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]]
-// CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4
-// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
+// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
+// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// CHECK3-NEXT: br label [[IFCONT4:%.*]]
// CHECK3: else3:
// CHECK3-NEXT: br label [[IFCONT4]]
// CHECK3: ifcont4:
-// CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
// CHECK3-NEXT: br label [[PRECOND]]
// CHECK3: exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8
-// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128
-// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
+// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
-// CHECK3-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
-// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1
+// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
+// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
// CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
-// CHECK3-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK3-NEXT: store float [[TMP2]], ptr [[D3]], align 4
+// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
+// CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]]
-// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4)
-// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1)
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
+// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
-// CHECK3-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
-// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8)
+// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1
-// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1
-// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32
+// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1
+// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32
+// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32
// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK3-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
-// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4
-// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4
+// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]]
// CHECK3-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1
-// CHECK3-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]])
-// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8
-// CHECK3-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK3-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1
-// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK3-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
+// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
+// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
+// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
+// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
+// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
-// CHECK3-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK3-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1
-// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0
-// CHECK3-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]]
-// CHECK3-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK3-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]]
-// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]]
-// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]]
-// CHECK3-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]]
-// CHECK3-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4
-// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4
-// CHECK3-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1
-// CHECK3-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1
-// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4
-// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK3-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4
-// CHECK3-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK3-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
+// CHECK3-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK3-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4
+// CHECK3-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4
// CHECK3-NEXT: br label [[IFCONT7:%.*]]
// CHECK3: else6:
// CHECK3-NEXT: br label [[IFCONT7]]
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK3-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+// CHECK3-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK3: then2:
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1
-// CHECK3-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
+// CHECK3-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
// CHECK3-NEXT: br label [[IFCONT4:%.*]]
// CHECK3: else3:
// CHECK3-NEXT: br label [[IFCONT4]]
// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK3: then6:
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
// CHECK3-NEXT: br label [[IFCONT8:%.*]]
// CHECK3: else7:
// CHECK3-NEXT: br label [[IFCONT8]]
// CHECK3: ifcont8:
// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]]
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK3: then10:
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4
-// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
+// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
// CHECK3-NEXT: br label [[IFCONT12:%.*]]
// CHECK3: else11:
// CHECK3-NEXT: br label [[IFCONT12]]
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
-// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4
-// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
+// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128
-// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128
-// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
+// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
+// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
-// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1
-// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20)
+// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32
-// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
+// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ]
+// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2
-// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]])
+// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]])
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK3: .omp.reduction.done:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
-// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1
-// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13)
+// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2
-// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32
-// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2
-// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32
+// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32
+// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32
// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
// CHECK3: cond.true9:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: br label [[COND_END11:%.*]]
// CHECK3: cond.false10:
-// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2
// CHECK3-NEXT: br label [[COND_END11]]
// CHECK3: cond.end11:
-// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ]
+// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ]
// CHECK3-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2
// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]])
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4
-// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4
-// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4
-// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK3-NEXT: br label [[IFCONT7:%.*]]
// CHECK3: else6:
// CHECK3-NEXT: br label [[IFCONT7]]
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK3: then2:
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK3-NEXT: br label [[IFCONT4:%.*]]
// CHECK3: else3:
// CHECK3-NEXT: br label [[IFCONT4]]
// CHECK3: ifcont4:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK3: then6:
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK3-NEXT: br label [[IFCONT8:%.*]]
// CHECK3: else7:
// CHECK3-NEXT: br label [[IFCONT8]]
// CHECK3: ifcont8:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK3: then10:
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK3-NEXT: br label [[IFCONT12:%.*]]
// CHECK3: else11:
// CHECK3-NEXT: br label [[IFCONT12]]
// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
-// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]])
-// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2
-// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32
-// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16
-// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]])
-// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
-// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1
-// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0
-// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
-// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2
-// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1
-// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0
-// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]]
-// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0
-// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]]
-// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]]
-// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]]
-// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
+// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
+// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
+// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
+// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
+// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1
+// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0
+// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]]
+// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
+// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]]
+// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
+// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1
-// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]]
-// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
+// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
+// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4
-// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4
-// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4
-// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4
-// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4
-// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4
-// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2
-// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
+// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
+// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
+// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
+// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2
+// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2
// CHECK3-NEXT: br label [[IFCONT7:%.*]]
// CHECK3: else6:
// CHECK3-NEXT: br label [[IFCONT7]]
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]]
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK3: then2:
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
-// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
// CHECK3-NEXT: br label [[IFCONT4:%.*]]
// CHECK3: else3:
// CHECK3-NEXT: br label [[IFCONT4]]
// CHECK3: ifcont4:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
// CHECK3: then6:
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2
-// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
+// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
// CHECK3-NEXT: br label [[IFCONT8:%.*]]
// CHECK3: else7:
// CHECK3-NEXT: br label [[IFCONT8]]
// CHECK3: ifcont8:
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]]
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
// CHECK3: then10:
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2
-// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
+// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
// CHECK3-NEXT: br label [[IFCONT12:%.*]]
// CHECK3: else11:
// CHECK3-NEXT: br label [[IFCONT12]]
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2
-// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
+// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128
-// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128
-// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
+// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]]
+// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
+// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
- bool UseGenericStateMachine, bool) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
+ bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
- bool UseGenericStateMachine, bool) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
+ bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8
// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4
-// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP29]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP30]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP28]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP29]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
+// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK: omp_offload.failed5:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(ptr [[TMP18]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK: omp_offload.cont6:
-// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
-// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4
-// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP45]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP46]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP44]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP45]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]])
-// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
+// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8
+// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]])
+// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
+// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
// CHECK: omp_offload.failed11:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149(ptr [[AA]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]]
// CHECK: omp_offload.cont12:
-// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[RAA]], align 8
-// CHECK-NEXT: store ptr [[TMP52]], ptr [[_TMP13]], align 8
-// CHECK-NEXT: [[TMP53:%.*]] = load ptr, ptr [[_TMP13]], align 8
-// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP55]], align 8
-// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP56]], align 8
-// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP57]], align 8
-// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[RAA]], align 8
+// CHECK-NEXT: store ptr [[TMP51]], ptr [[_TMP13]], align 8
+// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[_TMP13]], align 8
+// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP53]], align 8
+// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP54]], align 8
+// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP55]], align 8
+// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
-// CHECK-NEXT: store i32 1, ptr [[TMP60]], align 4
-// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP61]], align 4
-// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2
-// CHECK-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8
-// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP59]], ptr [[TMP63]], align 8
-// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP64]], align 8
-// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP65]], align 8
-// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6
-// CHECK-NEXT: store ptr null, ptr [[TMP66]], align 8
-// CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP67]], align 8
-// CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP68]], align 8
-// CHECK-NEXT: [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]])
-// CHECK-NEXT: [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-NEXT: br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]]
+// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4
+// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP59]], align 4
+// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8
+// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8
+// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP62]], align 8
+// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP63]], align 8
+// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8
+// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP65]], align 8
+// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP66]], align 8
+// CHECK-NEXT: [[TMP67:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]])
+// CHECK-NEXT: [[TMP68:%.*]] = icmp ne i32 [[TMP67]], 0
+// CHECK-NEXT: br i1 [[TMP68]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]]
// CHECK: omp_offload.failed18:
-// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP53]]) #[[ATTR5]]
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP52]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT19]]
// CHECK: omp_offload.cont19:
-// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[H]], ptr [[TMP71]], align 8
-// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[H]], ptr [[TMP72]], align 8
-// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP73]], align 8
-// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[H]], ptr [[TMP69]], align 8
+// CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[H]], ptr [[TMP70]], align 8
+// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP71]], align 8
+// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
-// CHECK-NEXT: store i32 1, ptr [[TMP76]], align 4
-// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP77]], align 4
-// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2
-// CHECK-NEXT: store ptr [[TMP74]], ptr [[TMP78]], align 8
-// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP75]], ptr [[TMP79]], align 8
-// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP80]], align 8
-// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP81]], align 8
-// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6
-// CHECK-NEXT: store ptr null, ptr [[TMP82]], align 8
-// CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP83]], align 8
-// CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP84]], align 8
-// CHECK-NEXT: [[TMP85:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]])
-// CHECK-NEXT: [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0
-// CHECK-NEXT: br i1 [[TMP86]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
+// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP74]], align 4
+// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP75]], align 4
+// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP72]], ptr [[TMP76]], align 8
+// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP73]], ptr [[TMP77]], align 8
+// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP78]], align 8
+// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP79]], align 8
+// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP80]], align 8
+// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP81]], align 8
+// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP82]], align 8
+// CHECK-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]])
+// CHECK-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0
+// CHECK-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
// CHECK: omp_offload.failed24:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(ptr [[H]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT25]]
// CHECK: omp_offload.cont25:
-// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[DA]], ptr [[TMP87]], align 8
-// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[DA]], ptr [[TMP88]], align 8
-// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP89]], align 8
-// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DA]], ptr [[TMP85]], align 8
+// CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DA]], ptr [[TMP86]], align 8
+// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP87]], align 8
+// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0
-// CHECK-NEXT: store i32 1, ptr [[TMP92]], align 4
-// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP93]], align 4
-// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2
-// CHECK-NEXT: store ptr [[TMP90]], ptr [[TMP94]], align 8
-// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP91]], ptr [[TMP95]], align 8
-// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP96]], align 8
-// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP97]], align 8
-// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6
-// CHECK-NEXT: store ptr null, ptr [[TMP98]], align 8
-// CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP99]], align 8
-// CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP100]], align 8
-// CHECK-NEXT: [[TMP101:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]])
-// CHECK-NEXT: [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK-NEXT: br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP90]], align 4
+// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP91]], align 4
+// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP88]], ptr [[TMP92]], align 8
+// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP89]], ptr [[TMP93]], align 8
+// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP94]], align 8
+// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP95]], align 8
+// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP96]], align 8
+// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP97]], align 8
+// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP98]], align 8
+// CHECK-NEXT: [[TMP99:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]])
+// CHECK-NEXT: [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
+// CHECK-NEXT: br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
// CHECK: omp_offload.failed30:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(ptr [[DA]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT31]]
// CHECK: omp_offload.cont31:
-// CHECK-NEXT: [[TMP103:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP103]])
+// CHECK-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP101]])
// CHECK-NEXT: [[CALL32:%.*]] = call noundef ptr @_Z5tmainIPiET_S1_(ptr noundef [[ARGC_ADDR]])
-// CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[CALL32]], align 4
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP104]]
+// CHECK-NEXT: [[TMP102:%.*]] = load i32, ptr [[CALL32]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP102]]
// CHECK-NEXT: ret i32 [[ADD]]
//
//
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8
// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4
-// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP29]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP30]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP28]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP29]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
+// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK: omp_offload.failed5:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125(ptr [[TMP18]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK: omp_offload.cont6:
-// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
-// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4
-// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP45]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP46]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP44]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP45]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]])
-// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
+// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8
+// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]])
+// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
+// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
// CHECK: omp_offload.failed11:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127(ptr [[AA]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]]
// CHECK: omp_offload.cont12:
-// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8
+// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8
-// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8
-// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8
+// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
+// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4
+// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4
-// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4
-// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2
+// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8
+// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8
-// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8
-// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP61]], align 8
-// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP62]], align 8
-// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6
+// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP60]], align 8
+// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP61]], align 8
+// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8
+// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8
-// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8
-// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8
-// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]])
-// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
-// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8
+// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]])
+// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
// CHECK: omp_offload.failed17:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129(ptr [[H]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]]
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8
// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4
-// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP29]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP30]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP28]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP29]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
+// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK: omp_offload.failed5:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125(ptr [[TMP18]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK: omp_offload.cont6:
-// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
-// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4
-// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP45]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP46]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP44]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP45]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]])
-// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
+// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8
+// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]])
+// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
+// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
// CHECK: omp_offload.failed11:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127(ptr [[AA]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]]
// CHECK: omp_offload.cont12:
-// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8
+// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8
-// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8
-// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0
-// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8
+// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
+// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4
+// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4
-// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
-// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4
-// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2
+// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8
+// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8
-// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3
-// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8
-// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4
-// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP61]], align 8
-// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5
-// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP62]], align 8
-// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6
+// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP60]], align 8
+// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP61]], align 8
+// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8
+// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8
-// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7
-// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8
-// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8
-// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8
-// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]])
-// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
-// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8
+// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]])
+// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
// CHECK: omp_offload.failed17:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129(ptr [[H]]) #[[ATTR5]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]]
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG48]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]]
-// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]]
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG48]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG83]]
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG85]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG85]]
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG88]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG88]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG91]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG91]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]]
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG93]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG93]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]]
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG96]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG96]]
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG98]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG98]]
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]]
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG98]]
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG98]]
// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]]
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG98]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG138]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]]
-// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]]
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG138]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG138]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]]
+// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]]
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG138]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG213]]
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG213]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG41]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]]
-// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]]
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG42]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG42]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG42]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP19]] to i1, !dbg [[DBG43]]
-// CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2, i1 true), !dbg [[DBG45:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]]
+// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !dbg [[DBG65]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG68]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1, !dbg [[DBG75:![0-9]+]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]]
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG89]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]]
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG92]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]]
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]]
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG97]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]]
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]]
-// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP25]] to i64, !dbg [[DBG100]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]]
// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]]
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP27]] to i1, !dbg [[DBG102]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]]
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]]
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP26]], !dbg [[DBG102]]
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]]
// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]]
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]]
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
-// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP31]], [[TMP32]], !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
+// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP11]]), !dbg [[DBG105:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]]
//
//
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG146]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]]
-// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]]
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG147]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG148:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG236]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG237]]
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG237]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2, i1 true), !dbg [[DBG238:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
-// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
+// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0
-// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]]
-// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9
-// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN10]]
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP39]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64
-// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64
-// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]]
-// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1
-// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
-// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64
+// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64
+// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1
+// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
+// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
+// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8
// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
-// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9
+// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]]
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]]
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP58]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]]
// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP64]], align 8
-// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP5]], align 8
-// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8
-// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4
-// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
-// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
-// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP75]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8
-// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
-// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP79]], ptr [[TMP69]])
+// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP59]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP60]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[_TMP5]], align 8
+// CHECK1-NEXT: store ptr [[TMP62]], ptr [[TMP61]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
+// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP65]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP66]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[TMP67]], align 8
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP68]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
+// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP65]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP69]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4
+// CHECK1-NEXT: [[TMP74:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP73]], ptr [[TMP65]])
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP81:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1
+// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP75]], 1
// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP83]])
+// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP77]])
+// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
+// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP79]], i32 1)
+// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP80]], align 8
+// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP81]], align 8
+// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to ptr
+// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP82]], align 8
// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
-// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP85]], i32 1)
-// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP86]], align 8
-// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP88]], align 8
-// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to ptr
-// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP89]], align 8
-// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4
-// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]]
+// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP87]], [[TMP88]]
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP97]]
+// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP89]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32
-// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32
+// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32
+// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP91]] to i32
// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]]
// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8
// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP89]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done22:
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP92]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP85]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP100]] monotonic, align 4
-// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP102]]
+// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP92]] monotonic, align 4
+// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP94]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]]
// CHECK1: omp.arraycpy.body24:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ]
-// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
-// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32
+// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
+// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP95]] to i32
// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: store i8 [[TMP104]], ptr [[_TMP28]], align 1
-// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[_TMP28]], align 1
-// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32
-// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
-// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32
+// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ]
+// CHECK1-NEXT: store i8 [[TMP96]], ptr [[_TMP28]], align 1
+// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[_TMP28]], align 1
+// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP97]] to i32
+// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
+// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP98]] to i32
// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]]
// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8
// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1
-// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0
-// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1
-// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
+// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1
+// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0
+// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1
+// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP94]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]]
// CHECK1: omp.arraycpy.done36:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP111]])
+// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP103]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]])
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]]
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9
-// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]]
-// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
-// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1
-// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
-// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]])
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64
-// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]]
-// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
+// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1
+// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]])
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]]
// CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12
// CHECK1-NEXT: ret i32 0
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]]
+// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32
-// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32
+// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done5:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
-// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
+// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0
-// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]]
-// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9
-// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]]
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64
-// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
-// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]]
-// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1
-// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6
-// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]])
-// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP55]], align 8
-// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP56]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
-// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP61]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP63]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP65]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
-// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP61]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP67]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4
-// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP71]], ptr [[TMP61]])
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64
+// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
+// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1
+// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6
+// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 0, i32 2, ptr [[DOTRD_INPUT_]])
+// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP51]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK1-NEXT: store ptr [[TMP54]], ptr [[TMP53]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
+// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP57]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP58]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP59]], align 8
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP60]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
+// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP57]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP61]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP65]], ptr [[TMP57]])
+// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4
+// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP68]], i32 0)
+// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP69]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP70]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP72:%.*]] = inttoptr i64 [[TMP11]] to ptr
+// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8
// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4
-// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP74]], i32 0)
-// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP75]], align 8
-// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8
-// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to ptr
-// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8
-// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
-// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP74]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP75]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]]
+// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP76]], [[TMP77]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP86]]
+// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP78]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32
-// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32
+// CHECK1-NEXT: [[TMP79:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP79]] to i32
+// CHECK1-NEXT: [[TMP80:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP80]] to i32
// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]]
// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8
// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP78]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done18:
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP74]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP89]] monotonic, align 4
-// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]]
+// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[TMP82:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP81]] monotonic, align 4
+// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP83]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]]
// CHECK1: omp.arraycpy.body20:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ]
-// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1
-// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32
+// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1
+// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP84]] to i32
// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: store i8 [[TMP93]], ptr [[_TMP24]], align 1
-// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[_TMP24]], align 1
-// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32
-// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1
-// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32
+// CHECK1-NEXT: [[TMP85:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP90:%.*]], [[ATOMIC_CONT]] ]
+// CHECK1-NEXT: store i8 [[TMP85]], ptr [[_TMP24]], align 1
+// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[_TMP24]], align 1
+// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP86]] to i32
+// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1
+// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP87]] to i32
// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]]
// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8
// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1
-// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0
-// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1
-// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
+// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK1-NEXT: [[TMP89:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP85]], i8 [[TMP88]] monotonic monotonic, align 1
+// CHECK1-NEXT: [[TMP90]] = extractvalue { i8, i1 } [[TMP89]], 0
+// CHECK1-NEXT: [[TMP91:%.*]] = extractvalue { i8, i1 } [[TMP89]], 1
+// CHECK1-NEXT: br i1 [[TMP91]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP83]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]]
// CHECK1: omp.arraycpy.done32:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP100]])
+// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP92]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]])
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]]
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9
-// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]]
-// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
-// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1
-// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
-// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]])
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64
-// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]]
-// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
+// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1
+// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]])
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]]
// CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12
// CHECK1-NEXT: ret i32 0
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]]
+// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32
-// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32
+// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done5:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK1-NEXT: ret i32 [[TMP18]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK1-NEXT: ret i32 [[TMP16]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK3-NEXT: ret i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK3-NEXT: ret i32 [[TMP16]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false)
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8
+// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8
-// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8
-// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8
-// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8
+// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8
+// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8
+// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8
+// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8
+// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8
+// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8
+// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8
+// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8
-// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8
-// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8
-// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8
-// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8
-// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8
-// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4
-// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4
-// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4
+// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4
+// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
-// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0
+// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4
-// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4
-// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8
-// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8
-// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8
-// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8
-// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8
-// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8
-// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
-// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4
+// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8
+// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8
+// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8
+// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8
+// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8
+// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8
+// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
+// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
-// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]])
+// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]])
// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]])
-// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK9-NEXT: ret i32 [[TMP60]]
+// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]])
+// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK9-NEXT: ret i32 [[TMP49]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0
-// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
-// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
-// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64
-// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]]
-// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
-// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4
+// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0
+// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]]
+// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0
-// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
-// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
-// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
-// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]]
-// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0
-// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
-// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
-// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64
-// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]]
-// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]]
-// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
-// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
-// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4
-// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4
+// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0
+// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]]
+// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0
+// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]]
+// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64
// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]]
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]]
-// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4
-// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64
-// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]]
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1
-// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1
+// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8
+// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8
+// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false)
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4
+// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4
+// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4
-// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4
-// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4
-// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4
+// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4
+// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4
+// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4
+// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4
+// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4
+// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4
+// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4
+// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4
-// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4
-// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4
-// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4
-// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4
-// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4
-// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4
-// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4
-// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4
-// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4
-// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
+// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4
+// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4
+// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
+// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4
-// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4
-// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4
-// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4
-// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4
-// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4
-// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4
-// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4
-// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8
-// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
-// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4
+// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4
+// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4
+// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4
+// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4
+// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4
+// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4
+// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8
+// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
-// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]])
+// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]])
// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
-// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]])
-// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK11-NEXT: ret i32 [[TMP59]]
+// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
+// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]])
+// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK11-NEXT: ret i32 [[TMP48]]
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK1-NEXT: ret i32 [[TMP18]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK1-NEXT: ret i32 [[TMP16]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK3-NEXT: ret i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK3-NEXT: ret i32 [[TMP16]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false)
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8
+// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8
-// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8
-// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8
-// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8
+// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8
+// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8
+// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8
+// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8
+// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8
+// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8
+// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8
+// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8
-// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8
-// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8
-// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8
-// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8
-// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8
-// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4
-// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4
-// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4
+// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4
+// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
-// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0
+// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4
-// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4
-// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8
-// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8
-// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8
-// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8
-// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8
-// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8
-// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
-// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4
+// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8
+// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8
+// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8
+// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8
+// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8
+// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8
+// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
+// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
-// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]])
+// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]])
// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]])
-// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK9-NEXT: ret i32 [[TMP60]]
+// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]])
+// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK9-NEXT: ret i32 [[TMP49]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8
// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0
-// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
-// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
-// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64
-// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]]
-// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
-// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4
+// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0
+// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]]
+// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0
-// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
-// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
-// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
-// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]]
-// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0
-// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
-// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
-// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64
-// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]]
-// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]]
-// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
-// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
-// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4
-// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0
+// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]]
+// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0
+// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]]
+// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64
// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]]
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]]
-// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4
-// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4
+// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64
+// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]]
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1
-// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1
+// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8
+// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8
+// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false)
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4
+// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4
+// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4
-// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4
-// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4
-// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4
+// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4
+// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4
+// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4
+// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4
+// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4
+// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4
+// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4
+// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4
-// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4
-// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4
-// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4
-// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4
-// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4
-// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4
-// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4
-// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4
-// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4
-// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
+// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4
+// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4
+// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
+// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4
-// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4
-// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4
-// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4
-// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4
-// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4
-// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4
-// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4
-// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8
-// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
-// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4
+// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4
+// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4
+// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4
+// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4
+// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4
+// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4
+// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8
+// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
-// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]])
+// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]])
// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
-// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]])
-// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK11-NEXT: ret i32 [[TMP59]]
+// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
+// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]])
+// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK11-NEXT: ret i32 [[TMP48]]
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP22]] to i1
-// CHECK1-NEXT: [[TMP23:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1
+// CHECK1-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
-// CHECK1-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT: store i64 100, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP21]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
+// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
// CHECK1: omp_offload.failed7:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr @Arg, align 4
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP35]])
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr @Arg, align 4
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP33]])
// CHECK1-NEXT: ret i32 [[CALL]]
//
//
// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP21]] to i1
-// CHECK1-NEXT: [[TMP22:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1
+// CHECK1-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
-// CHECK1-NEXT: store i64 100, ptr [[TMP31]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
-// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
-// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP21]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK1-NEXT: store i64 100, ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP20]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK1-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK1: omp_offload.failed6:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]]
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
-// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8
-// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8
-// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK5-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK5-NEXT: ret void
//
//
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK5-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
-// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
+// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0
-// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]]
-// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9
-// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]]
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64
-// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
-// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]]
-// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1
-// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6
-// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
-// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64
+// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
+// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1
+// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6
+// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
+// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
// CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9
+// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]]
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]]
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP62]], i64 [[TMP63]], ptr [[ARGC1]], ptr [[TMP64]])
+// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP58]], i64 [[TMP59]], ptr [[ARGC1]], ptr [[TMP60]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP61]], [[TMP62]]
// CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP68]])
-// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4
-// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP70]], i32 1)
-// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP71]], align 8
-// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8
-// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to ptr
-// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8
-// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4
-// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP64]])
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
+// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP66]], i32 1)
+// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP67]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP68]], align 8
+// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP11]] to ptr
+// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
+// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]]
+// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP74]], [[TMP75]]
// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP82]]
+// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP76]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32
-// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32
+// CHECK1-NEXT: [[TMP77:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP77]] to i32
+// CHECK1-NEXT: [[TMP78:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP78]] to i32
// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]]
// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8
// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP76]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done21:
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP72]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP85]] monotonic, align 4
-// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP87]]
+// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[TMP80:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP79]] monotonic, align 4
+// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP81]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]]
// CHECK1: omp.arraycpy.body23:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ]
-// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1
-// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32
+// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1
+// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP82]] to i32
// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: store i8 [[TMP89]], ptr [[_TMP27]], align 1
-// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[_TMP27]], align 1
-// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32
-// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1
-// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32
+// CHECK1-NEXT: [[TMP83:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP88:%.*]], [[ATOMIC_CONT]] ]
+// CHECK1-NEXT: store i8 [[TMP83]], ptr [[_TMP27]], align 1
+// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[_TMP27]], align 1
+// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP84]] to i32
+// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1
+// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP85]] to i32
// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]]
// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8
// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1
-// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0
-// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1
-// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
+// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK1-NEXT: [[TMP87:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP83]], i8 [[TMP86]] monotonic monotonic, align 1
+// CHECK1-NEXT: [[TMP88]] = extractvalue { i8, i1 } [[TMP87]], 0
+// CHECK1-NEXT: [[TMP89:%.*]] = extractvalue { i8, i1 } [[TMP87]], 1
+// CHECK1-NEXT: br i1 [[TMP89]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP81]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]]
// CHECK1: omp.arraycpy.done35:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: [[TMP96:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP96]])
+// CHECK1-NEXT: [[TMP90:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP90]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP31]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
-// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
+// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 0
-// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 0
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]]
-// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 9
-// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 [[LB_ADD_LEN10]]
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 8
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64
-// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64
-// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1
-// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
-// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP51]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
-// CHECK1-NEXT: store i32 1, ptr [[TMP52]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0
+// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]]
+// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9
+// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64
+// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = add nuw i64 [[TMP42]], 1
+// CHECK1-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP44]], ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP48]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
+// CHECK1-NEXT: store i32 1, ptr [[TMP49]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP51]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
+// CHECK1-NEXT: store ptr [[TMP52]], ptr [[DOTTASK_RED_]], align 8
// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
-// CHECK1-NEXT: [[TMP56:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, ptr [[DOTRD_INPUT_]])
-// CHECK1-NEXT: store ptr [[TMP56]], ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9
+// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP54]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP55]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP56]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// CHECK1-NEXT: store i64 [[TMP57]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP58]], [[TMP59]]
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1
+// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP60]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]]
// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8
-// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8
-// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP5]], align 8
-// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8
-// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4
-// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
-// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
-// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
-// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8
-// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
-// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP81]], ptr [[TMP71]])
+// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[_TMP5]], align 8
+// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
+// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.)
+// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
+// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4
+// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP67]])
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1
+// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP77]], 1
// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP85]])
+// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]])
+// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
+// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1)
+// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8
+// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8
+// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP13]] to ptr
+// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8
// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4
-// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP87]], i32 1)
-// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8
-// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8
-// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to ptr
-// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8
-// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4
-// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]]
+// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP89]], [[TMP90]]
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]]
+// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32
-// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32
+// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32
+// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP93]] to i32
// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]]
// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8
// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP91]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done22:
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4
-// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]]
+// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4
+// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4
+// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]]
// CHECK1: omp.arraycpy.body24:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ]
-// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
-// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32
+// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
+// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32
// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP28]], align 1
-// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP28]], align 1
-// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32
-// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
-// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32
+// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ]
+// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP28]], align 1
+// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP28]], align 1
+// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP99]] to i32
+// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1
+// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP100]] to i32
// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]]
// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8
// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1
-// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0
-// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1
-// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
+// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1
+// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0
+// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1
+// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP96]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]]
// CHECK1: omp.arraycpy.done36:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]])
+// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]])
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]]
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9
-// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]]
-// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
-// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1
-// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
-// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]])
-// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2
-// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64
-// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64
-// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]]
-// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
+// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
+// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1
+// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]])
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
+// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]]
// CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12
// CHECK1-NEXT: ret i32 0
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]]
+// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32
-// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32
+// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done5:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]]
+// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32
-// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
-// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32
+// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1
+// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done5:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK1-NEXT: ret i32 [[TMP18]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK1-NEXT: ret i32 [[TMP16]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !4
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]]
-// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK3-NEXT: ret i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK3-NEXT: ret i32 [[TMP16]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !5
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456
// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]]
// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]]
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]]
-// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456
// CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]]
// CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64
// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64
// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
-// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 123, ptr [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456
// CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]]
// CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]]
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]]
-// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 123, ptr [[I]], align 4
// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false)
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8
+// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8
-// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8
-// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8
-// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8
+// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8
+// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8
+// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8
+// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8
+// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8
+// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8
+// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8
+// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8
-// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8
-// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8
-// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8
-// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8
-// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8
-// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4
-// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4
-// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4
+// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4
+// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
-// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0
+// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4
-// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4
-// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8
-// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8
-// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8
-// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8
-// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8
-// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8
-// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
-// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4
+// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8
+// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8
+// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8
+// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8
+// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8
+// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8
+// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
+// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
-// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]])
+// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]])
// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]])
-// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK9-NEXT: ret i32 [[TMP60]]
+// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]])
+// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK9-NEXT: ret i32 [[TMP49]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8
// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !5
+// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5
+// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
+// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP31]], 0
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0
+// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1
+// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]]
+// CHECK9-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0
// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1
// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]]
-// CHECK9-NEXT: store i32 [[ADD22]], ptr [[I13]], align 4
-// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP32]], 0
-// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
-// CHECK9-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV24]], 1
-// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK9-NEXT: store i32 [[ADD26]], ptr [[J14]], align 4
+// CHECK9-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0
-// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
-// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
-// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64
-// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]]
-// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
-// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0
-// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
-// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
-// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
-// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]]
-// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0
-// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
-// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
-// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64
-// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]]
-// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]]
-// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
-// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
-// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !9
+// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0
+// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]]
+// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0
+// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]]
+// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0
+// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]]
+// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64
// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]]
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]]
-// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !9
-// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !9
+// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64
+// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]]
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
-// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1
-// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9
+// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1
+// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP35]], 0
-// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1
-// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1
-// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]]
-// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4
-// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP36]], 0
-// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1
-// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1
-// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]]
-// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4
+// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0
+// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
+// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1
+// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]]
+// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4
+// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
+// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1
+// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]]
+// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8
+// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8
+// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !14
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]]
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2
// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64
// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false)
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4
+// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4
+// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4
-// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4
-// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4
-// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4
+// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4
+// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4
+// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4
+// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4
+// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4
+// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4
+// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4
+// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4
-// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4
-// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4
-// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4
-// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4
-// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4
-// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4
-// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4
-// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4
-// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4
-// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
+// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4
+// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4
+// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
+// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4
-// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4
-// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4
-// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4
-// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4
-// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4
-// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4
-// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4
-// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8
-// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
-// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4
+// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4
+// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4
+// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4
+// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4
+// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4
+// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4
+// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8
+// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
-// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]])
+// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]])
// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
-// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]])
-// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK11-NEXT: ret i32 [[TMP59]]
+// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
+// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]])
+// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK11-NEXT: ret i32 [[TMP48]]
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
-// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !6
+// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]]
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !10
+// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0
// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0
// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]]
-// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0
// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !10
-// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]]
// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]]
-// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]]
-// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group !10
+// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
+// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1
-// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10
+// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !15
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
+// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2
// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2
// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]]
// CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]]
-// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]]
-// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]]
// CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0
// CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
// CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]]
-// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0
// CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
// CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64
// CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]]
// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]]
-// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64
// CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]]
-// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1
-// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2
// CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2
// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]]
// CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64
// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64
// CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]]
-// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 10, ptr [[I]], align 4
// CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]]
// CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
// CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0
// CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]]
-// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
// CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]]
// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]]
-// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]]
-// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1
-// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2
// CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2
// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]]
// CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]]
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]]
-// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 10, ptr [[I]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6
+// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group [[ACC_GRP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15
+// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group [[ACC_GRP15]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP21]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: store i32 1000, ptr [[I]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV5]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK3: omp.inner.for.cond7:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK3: omp.inner.for.body9:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK3: omp.body.continue12:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK3: omp.inner.for.inc13:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK3: omp.inner.for.end15:
// CHECK3-NEXT: store i32 1000, ptr [[I6]], align 4
// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: store i32 1000, ptr [[I]], align 4
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !5
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group !14
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group !6
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
-// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group !15
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP15]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !4
+// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: .omp.final.done:
// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8
-// CHECK5-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK5-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4
+// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]]
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]]
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group !8
-// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8
+// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]]
+// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8
+// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: .omp.final.done:
// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8
-// CHECK5-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
+// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK5-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4
+// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4
+// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK5-NEXT: ret void
//
//
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK5-NEXT: ret void
//
//
// CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
-// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 2, ptr [[I]], align 4
// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
// CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
-// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 2, ptr [[I]], align 4
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4
// CHECK7-NEXT: ret i32 0
//
// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
-// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: store i32 2, ptr [[I]], align 4
// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: store i32 0, ptr [[T_VAR1]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
-// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: store i32 2, ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4
// CHECK9-NEXT: ret i32 0
//
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
-// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK9-NEXT: ]
// CHECK9: .omp.reduction.case1:
-// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.case2:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4
+// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4
// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.default:
// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK9-NEXT: ret void
//
//
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK1-NEXT: ret i32 [[TMP18]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK1-NEXT: ret i32 [[TMP16]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456
// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
-// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0
// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
-// CHECK3-NEXT: ret i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+// CHECK3-NEXT: ret i32 [[TMP16]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456
// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]]
-// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456
// CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]]
// CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64
// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64
// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
-// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 123, ptr [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456
// CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
// CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]]
// CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]]
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]]
-// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 123, ptr [[I]], align 4
// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false)
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8
+// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8
+// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8
-// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8
-// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8
-// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8
+// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8
+// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8
+// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8
+// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8
+// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8
+// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8
+// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8
+// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8
-// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8
-// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8
-// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8
-// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8
-// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8
-// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8
-// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4
-// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4
-// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4
+// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4
+// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
-// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0
+// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4
-// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4
-// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8
-// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8
-// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8
-// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8
-// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8
-// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8
-// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8
-// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
-// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4
+// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8
+// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8
+// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8
+// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8
+// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8
+// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8
+// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
+// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
-// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]])
+// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]])
// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
-// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]])
-// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK9-NEXT: ret i32 [[TMP60]]
+// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]])
+// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK9-NEXT: ret i32 [[TMP49]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]]
-// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]]
+// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
-// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
-// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
+// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0
-// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
-// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
-// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64
-// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]]
-// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
-// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0
-// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
-// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
-// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
-// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]]
-// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0
-// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
-// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
-// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64
-// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]]
-// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]]
-// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
-// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
-// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !5
+// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0
+// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]]
+// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0
+// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]]
+// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0
+// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]]
+// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64
// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]]
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]]
-// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !5
-// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64
-// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !5
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]]
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
-// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1
-// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5
+// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1
+// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP33]], 0
-// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1
-// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1
-// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]]
-// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4
-// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
-// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP34]], 0
-// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1
-// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1
-// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]]
-// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4
+// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP33]], 0
+// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
+// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1
+// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]]
+// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP34]], 0
+// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
+// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1
+// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]]
+// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8
+// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
-// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8
+// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8
+// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
+// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2
// CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !11
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64
// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false)
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4
+// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4
+// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4
-// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4
-// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4
-// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4
+// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4
+// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4
+// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4
+// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4
+// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4
+// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4
+// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4
+// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4
-// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4
-// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4
-// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4
-// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4
-// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4
-// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4
-// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4
-// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4
-// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4
-// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
+// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4
+// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4
+// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
+// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0
// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
+// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4
-// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4
-// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4
-// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4
-// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4
-// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4
-// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4
-// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4
-// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8
-// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
-// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4
+// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4
+// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4
+// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4
+// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4
+// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4
+// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4
+// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8
+// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
-// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]])
+// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]])
// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
-// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
-// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]])
-// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK11-NEXT: ret i32 [[TMP59]]
+// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
+// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]])
+// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK11-NEXT: ret i32 [[TMP48]]
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]]
// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0
// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]]
-// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0
// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
-// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !6
-// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]]
// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]]
-// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]]
-// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !6
+// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1
-// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6
+// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4
-// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8
-// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
-// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4
+// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8
+// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]])
+// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]]
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2
// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2
// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !12
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]]
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]]
-// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]]
// CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0
// CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
// CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]]
-// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0
// CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
// CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2
-// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64
// CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]]
// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]]
-// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64
// CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]]
-// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2
+// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1
-// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2
+// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]]
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2
// CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2
// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]]
// CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64
// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64
// CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]]
-// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 10, ptr [[I]], align 4
// CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]]
// CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
// CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0
// CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]]
-// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
// CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3
-// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]]
// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]]
-// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]]
-// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3
+// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1
-// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3
+// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2
// CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2
// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]]
// CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]]
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]]
-// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 10, ptr [[I]], align 4
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
-// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
-// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 2, ptr [[I]], align 4
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
-// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 2, ptr [[I]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4
// CHECK5-NEXT: ret i32 0
//
// CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
-// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3
+// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 2, ptr [[I]], align 4
// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
// CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4
// CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
-// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 2, ptr [[I]], align 4
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4
// CHECK7-NEXT: ret i32 0
//
// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
-// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group !4
-// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4
+// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]]
+// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
+// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK9-NEXT: ]
// CHECK9: .omp.reduction.case1:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.case2:
-// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4
+// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4
// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.default:
// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
// CHECK9-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: store i32 0, ptr [[Y2]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8
-// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8
-// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8
-// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8
-// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
+// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK1: omp_offload.failed5:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false)
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK1: omp.arrayinit.done:
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done6:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK1: omp.arraycpy.body8:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK1: omp.arraycpy.done14:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done2:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false)
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK1: omp.arrayinit.done:
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done6:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK1: omp.arraycpy.body8:
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK1: omp.arraycpy.done14:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99
-// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99
+// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK1: omp.arraycpy.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK1: omp.arraycpy.done2:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8
-// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8
-// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8
-// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
-// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4
-// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8
-// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8
-// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP49]], align 8
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP50]], align 8
-// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8
-// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8
-// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8
-// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
+// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK1: omp_offload.failed5:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16
-// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16
+// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16
// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0)
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
-// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16
-// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16
-// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16
+// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16
+// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16
// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0)
// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16
-// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16
+// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16
+// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16
-// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16
+// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16
// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0)
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
-// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16
-// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16
-// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16
+// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16
+// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16
// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0)
// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
-// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16
-// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16
+// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16
+// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16
// CHECK1-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: store i32 0, ptr [[Y2]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP17]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT: store i32 3, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP25]], align 8
-// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK3-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8
+// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
-// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP28]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP30]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0
-// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP33]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP35]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1
-// CHECK3-NEXT: store ptr null, ptr [[TMP37]], align 4
-// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP38]], align 4
-// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[X]], ptr [[TMP40]], align 4
-// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2
-// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4
-// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP23]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP25]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP26]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP28]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[X]], ptr [[TMP29]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2
+// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
-// CHECK3-NEXT: store i32 1, ptr [[TMP45]], align 4
-// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
-// CHECK3-NEXT: store i32 3, ptr [[TMP46]], align 4
-// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
-// CHECK3-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 4
-// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
-// CHECK3-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 4
-// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
-// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 4
-// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
-// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 4
-// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
-// CHECK3-NEXT: store ptr null, ptr [[TMP51]], align 4
-// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
-// CHECK3-NEXT: store ptr null, ptr [[TMP52]], align 4
-// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
-// CHECK3-NEXT: store i64 0, ptr [[TMP53]], align 8
-// CHECK3-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]])
-// CHECK3-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK3-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
+// CHECK3-NEXT: store i32 1, ptr [[TMP33]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
+// CHECK3-NEXT: store i32 3, ptr [[TMP34]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP40]], align 4
+// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP41]], align 8
+// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]])
+// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
+// CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
// CHECK3: omp_offload.failed5:
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT6]]
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false)
// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK3: omp.arrayinit.body:
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK3: omp.arrayinit.done:
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK3: omp.arraycpy.body:
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK3: omp.arraycpy.done6:
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK3: omp.arraycpy.body8:
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK3: omp.arraycpy.done14:
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK3: omp.arraycpy.body:
-// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK3: omp.arraycpy.done2:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false)
// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK3: omp.arrayinit.body:
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK3: omp.arrayinit.done:
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK3: omp.arraycpy.body:
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK3: omp.arraycpy.done6:
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK3: omp.arraycpy.body8:
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK3: omp.arraycpy.done14:
-// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99
-// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99
+// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK3: omp.arraycpy.body:
-// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK3: omp.arraycpy.done2:
// CHECK3-NEXT: ret void
// CHECK5-NEXT: store i32 0, ptr [[Y2]], align 4
// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4
-// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK5-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
-// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
-// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4
// CHECK5-NEXT: ret void
//
//
// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false)
// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK5: omp.arrayinit.body:
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK5: omp.arrayinit.done:
-// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8
-// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK5: omp.arraycpy.body:
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK5: omp.arraycpy.done6:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK5: omp.arraycpy.body8:
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK5: omp.arraycpy.done14:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK5: omp.arraycpy.body:
-// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK5: omp.arraycpy.done2:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8
// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false)
// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK5: omp.arrayinit.body:
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK5: omp.arrayinit.done:
-// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8
-// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK5: omp.arraycpy.body:
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK5: omp.arraycpy.done6:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK5: omp.arraycpy.body8:
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK5: omp.arraycpy.done14:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99
-// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99
+// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK5: omp.arraycpy.body:
-// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK5: omp.arraycpy.done2:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16
// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
-// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16
-// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]]
+// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16
+// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]]
// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16
// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]]
// CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK5: atomic_cont:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
-// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16
-// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16
-// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]]
+// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16
+// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16
+// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]]
// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16
// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]]
// CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK5: atomic_exit:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16
-// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16
+// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16
+// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16
// CHECK5-NEXT: ret void
//
//
// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16
// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8
-// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var)
-// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK5-NEXT: ]
// CHECK5: .omp.reduction.case1:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16
-// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]]
+// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16
+// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]]
// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.case2:
-// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16
// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]]
// CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK5: atomic_cont:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
-// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16
-// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16
-// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16
-// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]]
+// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16
+// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16
+// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16
+// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]]
// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16
// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]]
// CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK5: atomic_exit:
-// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK5: .omp.reduction.default:
// CHECK5-NEXT: ret void
// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16
-// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]]
-// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16
+// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16
+// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]]
+// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16
// CHECK5-NEXT: ret void
//
//
// CHECK7-NEXT: store i32 0, ptr [[Y2]], align 4
// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK7-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4
-// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4
-// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK7-NEXT: ]
// CHECK7: .omp.reduction.case1:
-// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK7-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.case2:
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4
-// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK7-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.default:
// CHECK7-NEXT: ret void
// CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
-// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
-// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1
-// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
-// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4
-// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4
+// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1
+// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
+// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4
// CHECK7-NEXT: ret void
//
//
// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4
// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false)
// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK7: omp.arrayinit.body:
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK7: omp.arrayinit.done:
-// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4
-// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var)
-// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK7-NEXT: ]
// CHECK7: .omp.reduction.case1:
-// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK7: omp.arraycpy.body:
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK7: omp.arraycpy.done6:
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.case2:
-// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK7: omp.arraycpy.body8:
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK7: omp.arraycpy.done14:
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.default:
// CHECK7-NEXT: ret void
// CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK7: omp.arraycpy.body:
-// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK7: omp.arraycpy.done2:
// CHECK7-NEXT: ret void
// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4
// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false)
// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
+// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK7: omp.arrayinit.body:
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// CHECK7: omp.arrayinit.done:
-// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4
-// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
-// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK7-NEXT: ]
// CHECK7: .omp.reduction.case1:
-// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]]
+// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK7: omp.arraycpy.body:
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
// CHECK7: omp.arraycpy.done6:
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.case2:
-// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]]
+// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
// CHECK7: omp.arraycpy.body8:
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
-// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4
+// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
// CHECK7: omp.arraycpy.done14:
-// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7: .omp.reduction.default:
// CHECK7-NEXT: ret void
// CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
// CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
+// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
+// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
+// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99
-// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]]
+// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99
+// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// CHECK7: omp.arraycpy.body:
-// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// CHECK7: omp.arraycpy.done2:
// CHECK7-NEXT: ret void
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
- bool RequiresFullRuntime);
+ InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
- bool RequiresFullRuntime);
+ void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
///}
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
-__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
+__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1)
+__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
- bool RequiresFullRuntime) {
+OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
if (!updateToLocation(Loc))
return Loc.IP;
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
- ConstantInt *RequiresFullRuntimeVal =
- ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
CallInst *ThreadKind = Builder.CreateCall(
- Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
+ Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
}
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
- bool IsSPMD,
- bool RequiresFullRuntime) {
+ bool IsSPMD) {
if (!updateToLocation(Loc))
return;
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
- ConstantInt *RequiresFullRuntimeVal =
- ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
- Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
+ Builder.CreateCall(Fn, {Ident, IsSPMDVal});
}
void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
if (!UV || UV->size() + (ReplVal != nullptr) < 2)
return false;
- LLVM_DEBUG(
+F.dump();
+ (
dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") << "\n");
constexpr const int InitModeArgNo = 1;
constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
- constexpr const int InitRequiresFullRuntimeArgNo = 3;
- constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
ModeSimplifyCB);
- A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB,
- InitRequiresFullRuntimeArgNo),
- IsGenericModeSimplifyCB);
- A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelDeinitCB,
- DeinitRequiresFullRuntimeArgNo),
- IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
ConstantInt *ModeArg =
const int InitModeArgNo = 1;
const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
- const int InitRequiresFullRuntimeArgNo = 3;
- const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
- A.changeUseAfterManifest(
- KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
- *ConstantInt::getBool(Ctx, false));
- A.changeUseAfterManifest(
- KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
- *ConstantInt::getBool(Ctx, false));
++NumOpenMPTargetRegionKernelsSPMD;
; CHECK: Function Attrs: convergent norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: store i8 1, i8* @G, align 1
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
store i8 %isSPMD, i8* @G
call void @bar() #2
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
declare i8 @__kmpc_is_spmd_exec_mode()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
;
;
+; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
+; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-NEXT: ret i32 0
+;
+;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
;
;
+; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
+; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-NEXT: ret i32 0
+;
+;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
;
;
+; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-DISABLED-NEXT: ret i32 0
+;
+;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
;
;
+; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-DISABLED-NEXT: ret i32 0
+;
+;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
}
; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO.
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1);
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1);
define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
entry:
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @unknown() #6, !dbg !27
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
%2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
ret void
worker.exit: ; preds = %entry
ret void
}
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
-; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
+; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false)
; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK-DISABLED: if.then:
; CHECK-DISABLED-NEXT: call void @bar()
; CHECK-DISABLED-NEXT: br label [[IF_END]]
; CHECK-DISABLED: if.end:
-; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
- %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false)
+ %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
call void @bar()
br label %if.end
if.end:
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
declare i32 @__kmpc_get_hardware_thread_id()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK-NEXT: ret void
; CHECK: main.thread.user_code:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @helper1()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
ret i32 %ret
}
declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) #1
+declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) #1
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
ret void
}
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @helper1()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
call void @helper1()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}
define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}
}
declare i32 @__kmpc_get_hardware_num_threads_in_block()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1
+declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext) #1
!llvm.module.flags = !{!0, !1}
define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
entry:
- %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false, i1 false) #0
+ %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
region.barrier:
tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2)
- tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
+ tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2) #0
br label %common.ret
}
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
-; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR1]]
+; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
+; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
+;
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
+; CHECK-NEXT: store double [[DIV]], double* @_ZL6Device, align 8, !tbaa [[TBAA11]]
+; CHECK-NEXT: ret void
+;
define void @foo() {
entry:
- %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
+ %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true)
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
%x_on_stack = bitcast i8* %0 to i32*
%1 = bitcast i32* %x_on_stack to i8*
call void @share(i8* %1), !dbg !10
call void @__kmpc_free_shared(i8* %0)
- call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}
declare void @__kmpc_free_shared(i8* nocapture)
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1);
+declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1);
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i1)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr)
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
ret void
}
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
entry:
;.
define weak void @is_spmd() {
; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
define weak void @non_spmd() {
; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
define weak void @will_not_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext)
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext)
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext)
+declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8)
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
declare void @foo()
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @spmd_helper()
call void @mixed_helper()
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}
define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
+ %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}
declare void @foo()
declare void @bar()
declare i8 @__kmpc_parallel_level()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1
!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}
%struct.ident_t = type { i32, i32, i32, i32, i8* }
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
- ret i32 0
-}
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
-
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLED-NEXT: ret i32 0
+;
+ ret i32 0
+}
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR0]]
; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-NEXT: call void @unknown_no_openmp()
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp()
-; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
+; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true)
call void @foo()
call void @bar()
call void @convert_and_move_alloca()
call void @unknown_no_openmp()
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
ret void
}
; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: !9)
;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-REMARKS: {{.*}}
define dso_local void @foo() "kernel" {
entry:
- %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
%x_on_stack = bitcast i8* %x to i32*
%0 = bitcast i32* %x_on_stack to i8*
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %x, i64 4)
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
}
define void @bar() "kernel" {
- %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
br i1 %cmp, label %master1, label %exit
call void @__kmpc_free_shared(i8* %y, i64 4)
br label %exit
exit:
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
}
define void @baz_spmd() "kernel" {
- %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true, i1 true)
+ %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
br i1 %c0, label %master3, label %exit
call void @__kmpc_free_shared(i8* %z, i64 24)
br label %exit
exit:
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
ret void
}
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
;
;
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]]
; CHECK-NEXT: ret i8* [[GEP]]
;
+;
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" }
; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) }
; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2)
;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-LIMIT: {{.*}}
+; CHECK-REMARKS: {{.*}}
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
define void @kernel() {
- %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false)
+ %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
if.else:
br label %if.end
if.end:
- call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}
declare void @__kmpc_kernel_prepare_parallel(i8*)
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
attributes #0 = { cold noinline }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store i32 0, i32* %.zero.addr, align 4
store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store i32 0, i32* %.zero.addr, align 4
store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store i32 0, i32* %.zero.addr, align 4
store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store i32 0, i32* %.zero.addr, align 4
store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store i32 0, i32* %.zero.addr, align 4
store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
%4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2)
%5 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0)
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
br label %common.ret
}
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
+; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-NEXT: ret i32 0
+;
+; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
+; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-NEXT: ret i32 0
+;
+; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-DISABLED-NEXT: ret i32 0
+;
+; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-DISABLED-NEXT: ret i32 0
+;
ret i32 0
}
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
; Function Attrs: alwaysinline convergent norecurse nounwind
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK: region.exit:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3]]
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
store double %call.i, double* %x, align 8, !tbaa !8
%2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0
call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3
br label %common.ret
}
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr
; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 {
; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
; Function Attrs: convergent
declare double @__nv_sin(double) local_unnamed_addr #5
; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function.
;
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
-; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false, i1 false)
+; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false)
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
entry:
%ng1 = alloca i32, align 4
%captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5)
- %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
%captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7
call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2)
- call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true)
+ call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1)
br label %common.ret
common.ret: ; preds = %user_code.entry, %entry
}
; Function Attrs: convergent nounwind
-define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine, i1 noundef zeroext %0) local_unnamed_addr #9 {
+define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
entry:
+ %0 = and i32 undef, undef
%1 = and i8 %Mode, 2
%tobool.not = icmp eq i8 %1, 0
br i1 %tobool.not, label %if.else, label %if.then
}
; Function Attrs: nounwind
-define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %0) local_unnamed_addr #10 {
+define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode) local_unnamed_addr #10 {
ret void
}
; CHECK-NEXT: entry:
; CHECK-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6]]
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR6]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
; CHECK-DISABLED-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false, i1 true) #[[ATTR6:[0-9]+]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED: is_worker_check:
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 true) #[[ATTR6]]
+; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1) #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
; CHECK-DISABLED-NEXT: ret void
;
entry:
%N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
%call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
%call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
%call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3
ret void
worker.exit: ; preds = %entry
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLED-NEXT: ret i32 0
+;
ret i32 0
}
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
declare void @llvm.experimental.noalias.scope.decl(metadata) #4
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @spmd_helper() #5
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-DISABLE-SPMDIZATION-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0
+;
ret i32 0
}
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
; Function Attrs: convergent noinline norecurse nounwind
define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @generic_helper() #[[ATTR5]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @generic_helper() #5
- call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
+ call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
worker.exit: ; preds = %entry
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @unknown() #6, !dbg !27
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
+define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
- %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
+ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
call void @spmd_amenable()
- call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
+ call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
- bool UseGenericStateMachine, bool);
+ bool UseGenericStateMachine);
-void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool);
+void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode);
///}
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
- bool UseGenericStateMachine, bool) {
+ bool UseGenericStateMachine) {
FunctionTracingRAII();
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
if (IsSPMD) {
///
/// \param Ident Source location identification, can be NULL.
///
-void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) {
+void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) {
FunctionTracingRAII();
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
state::assumeInitialState(IsSPMD);