#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <cstdint>
#include <sstream>
#define DEBUG_TYPE "openmp-ir-builder"
return GV;
}
-Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
- uint32_t SrcLocStrSize,
- IdentFlag LocFlags,
- unsigned Reserve2Flags) {
+Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
+ IdentFlag LocFlags,
+ unsigned Reserve2Flags) {
// Enable "C-mode".
LocFlags |= OMP_IDENT_FLAG_KMPC;
- Constant *&Ident =
+ Value *&Ident =
IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
if (!Ident) {
Constant *I32Null = ConstantInt::getNullValue(Int32);
- Constant *IdentData[] = {I32Null,
- ConstantInt::get(Int32, uint32_t(LocFlags)),
- ConstantInt::get(Int32, Reserve2Flags),
- ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
+ Constant *IdentData[] = {
+ I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
+ ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
Constant *Initializer =
ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
}
}
- return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
+ return Builder.CreatePointerCast(Ident, IdentPtr);
}
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
- uint32_t &SrcLocStrSize) {
- SrcLocStrSize = LocStr.size();
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
Constant *&SrcLocStr = SrcLocStrMap[LocStr];
if (!SrcLocStr) {
Constant *Initializer =
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
StringRef FileName,
- unsigned Line, unsigned Column,
- uint32_t &SrcLocStrSize) {
+ unsigned Line,
+ unsigned Column) {
SmallString<128> Buffer;
Buffer.push_back(';');
Buffer.append(FileName);
Buffer.append(std::to_string(Column));
Buffer.push_back(';');
Buffer.push_back(';');
- return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
+ return getOrCreateSrcLocStr(Buffer.str());
}
-Constant *
-OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) {
- StringRef UnknownLoc = ";unknown;unknown;0;0;;";
- return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
+Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
+ return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
}
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL,
- uint32_t &SrcLocStrSize,
- Function *F) {
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) {
DILocation *DIL = DL.get();
if (!DIL)
- return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+ return getOrCreateDefaultSrcLocStr();
StringRef FileName = M.getName();
if (DIFile *DIF = DIL->getFile())
if (Optional<StringRef> Source = DIF->getSource())
if (Function.empty() && F)
Function = F->getName();
return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
- DIL->getColumn(), SrcLocStrSize);
+ DIL->getColumn());
}
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc,
- uint32_t &SrcLocStrSize) {
- return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
- Loc.IP.getBlock()->getParent());
+Constant *
+OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
+ return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
}
Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
break;
}
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Args[] = {
- getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
- getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
+ getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
// If we are in a cancellable parallel region, barriers are cancellation
// points.
llvm_unreachable("Unknown cancel kind!");
}
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
Value *Result = Builder.CreateCall(
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
if (!updateToLocation(Loc))
return Loc.IP;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadID = getOrCreateThreadID(Ident);
if (NumThreads) {
void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
// Build call void __kmpc_flush(ident_t *loc)
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Args[] = {getOrCreateIdent(SrcLocStr)};
Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
}
void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
// global_tid);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
// Ignore return result until untied tasks are supported.
void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Constant *I32Null = ConstantInt::getNullValue(Int32);
Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
Module *Module = Func->getParent();
Value *RedArrayPtr =
Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
bool CanGenerateAtomic =
llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
return RI.AtomicReductionGen;
});
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
- CanGenerateAtomic
- ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
- : IdentFlag(0));
+ Value *Ident = getOrCreateIdent(
+ SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
+ : IdentFlag(0));
Value *ThreadId = getOrCreateThreadID(Ident);
Constant *NumVariables = Builder.getInt32(NumReductions);
const DataLayout &DL = Module->getDataLayout();
return Loc.IP;
Directive OMPD = Directive::OMPD_master;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId};
return Loc.IP;
Directive OMPD = Directive::OMPD_masked;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId, Filter};
Value *ArgsEnd[] = {Ident, ThreadId};
Builder.restoreIP(CLI->getPreheaderIP());
Builder.SetCurrentDebugLocation(DL);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
- Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
+ Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
Value *IV = CLI->getIndVar();
// Set up the source location value for OpenMP runtime.
Builder.SetCurrentDebugLocation(DL);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
- Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
+ Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
Value *IV = CLI->getIndVar();
if (!updateToLocation(Loc))
return Loc.IP;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
}
Directive OMPD = Directive::OMPD_single;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId};
return Loc.IP;
Directive OMPD = Directive::OMPD_critical;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *LockVar = getOMPCriticalRegionLock(CriticalName);
Value *Args[] = {Ident, ThreadId, LockVar};
Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
Instruction *ExitCall = nullptr;
if (IsThreads) {
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId};
IRBuilder<>::InsertPointGuard IPG(Builder);
Builder.restoreIP(Loc.IP);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {ThreadId, Size, Allocator};
IRBuilder<>::InsertPointGuard IPG(Builder);
Builder.restoreIP(Loc.IP);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {ThreadId, Addr, Allocator};
Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
IRBuilder<>::InsertPointGuard IPG(Builder);
Builder.restoreIP(Loc.IP);
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Constant *ThreadPrivateCache =
getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
if (!updateToLocation(Loc))
return Loc.IP;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
if (!updateToLocation(Loc))
return;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK1: entry.split:
; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
+; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK1: entry.split:
; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK1: entry.split:
; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK1: omp_region.end:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split:
; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK1: omp_region.end4:
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK1: omp.par.merged.split.split.split.split:
; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1: omp.par.merged.split.split.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK1: omp_region.body5.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK1-NEXT: br label [[OMP_REGION_END4]]
; CHECK1: omp_region.body:
; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged.split:
; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK1: omp_region.body.split:
-; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: br label [[OMP_REGION_END]]
; CHECK1: omp.par.outlined.exit.exitStub:
; CHECK1-NEXT: ret void
; CHECK1-NEXT: entry:
; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK1: omp_parallel:
-; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK1: omp.par.outlined.exit:
; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK1: omp.par.merged:
; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK1: entry.split:
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK2: entry.split:
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
+; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK2: entry.split:
; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK2: entry.split:
; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2: omp_region.end:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split:
; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2: omp_region.end4:
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK2: omp.par.merged.split.split.split.split:
; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2: omp.par.merged.split.split.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK2: omp_region.body5.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT: br label [[OMP_REGION_END4]]
; CHECK2: omp_region.body:
; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged.split:
; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2: omp_region.body.split:
-; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: br label [[OMP_REGION_END]]
; CHECK2: omp.par.outlined.exit.exitStub:
; CHECK2-NEXT: ret void
; CHECK2-NEXT: entry:
; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK2: omp_parallel:
-; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2: omp.par.outlined.exit:
; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK2: omp.par.merged:
; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK2: entry.split: