; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
-; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s
-; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging < %s | FileCheck %s
+; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s --check-prefix=CHECK1
+; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging < %s | FileCheck %s --check-prefix=CHECK2
; #include <omp.h>
; void foo();
; void use(int);
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP1]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK: omp_region.end4:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split.split:
+; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body5:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK: seq.par.merged2:
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK: omp_region.body5.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: br label [[OMP_REGION_END4]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP1]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK: omp_region.end4:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split.split:
+; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body5:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK: seq.par.merged2:
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK: omp_region.body5.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: br label [[OMP_REGION_END4]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP1]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK: omp_region.end:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split:
+; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK: omp_region.end4:
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split.split:
+; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK: omp_region.body5:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK: seq.par.merged2:
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK: omp.par.merged.split.split.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK: omp_region.body5.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: br label [[OMP_REGION_END4]]
+; CHECK: omp_region.body:
+; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK: seq.par.merged:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK: omp.par.merged.split:
+; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK: omp_region.body.split:
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK: omp.par.outlined.exit:
+; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK: omp.par.exit.split:
+; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: call void (...) @foo()
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: omp.par.entry:
+; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK: omp.par.outlined.exit.exitStub:
+; CHECK-NEXT: ret void
+; CHECK: omp.par.region:
+; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK: omp.par.merged:
+; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK: omp.par.region.split:
+; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK: omp.par.pre_finalize:
+; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT: call void @use(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK: omp_parallel:
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0:#.*]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK: omp.par.pre_finalize:
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: call void (...) @foo()
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT: call void @use(i32 [[CONV]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT: call void @use(i32 [[CONV]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP1]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK: omp.par.pre_finalize:
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_3
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK: omp.par.pre_finalize:
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK: omp_region.end:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split:
-; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK: omp_region.end4:
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split.split.split:
-; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK: omp.par.merged.split.split.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK: omp_region.body5.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT: br label [[OMP_REGION_END4]]
; CHECK: omp_region.body:
; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
; CHECK: omp.par.merged.split:
; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK: omp_region.body.split:
-; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT: br label [[OMP_REGION_END]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: call void (...) @foo()
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: call void (...) @foo()
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
; CHECK: omp_parallel:
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK: omp.par.outlined.exit:
; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: call void (...) @foo()
-; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: omp.par.entry:
; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
; CHECK: omp.par.region:
; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]]
; CHECK: omp.par.merged:
-; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK: omp.par.pre_finalize:
; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
-;
-;
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @use(i32 [[TMP0]])
; CHECK-NEXT: ret void
+; CHECK1-LABEL: define {{[^@]+}}@merge
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (...) @foo()
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK1-NEXT: store float [[ADD]], float* [[P]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK1-NEXT: call void @use(i32 [[CONV]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK1-NEXT: call void @use(i32 [[CONV]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK1-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK1-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK1-NEXT: store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK1-NEXT: store i32 [[ADD]], i32* [[B]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP1]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1: if.then:
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT: ret void
+; CHECK1: if.end:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1: if.then:
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT: ret void
+; CHECK1: if.end:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK1-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1: if.then:
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT: ret void
+; CHECK1: if.end:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1: if.then:
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT: ret void
+; CHECK1: if.end:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1: omp_region.end:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK1: omp_region.end4:
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split.split.split:
+; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1: omp_region.body5:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK1: seq.par.merged2:
+; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK1-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split.split.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK1: omp_region.body5.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT: br label [[OMP_REGION_END4]]
+; CHECK1: omp_region.body:
+; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1: seq.par.merged:
+; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK1-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1: omp.par.merged.split:
+; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1: omp_region.body.split:
+; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (...) @foo()
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (...) @foo()
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK1: omp_parallel:
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1: omp.par.outlined.exit:
+; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1: omp.par.exit.split:
+; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1: entry.split.split:
+; CHECK1-NEXT: call void (...) @foo()
+; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT: omp.par.entry:
+; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK1: omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT: ret void
+; CHECK1: omp.par.region:
+; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1: omp.par.merged:
+; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK1: entry.split:
+; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1: omp.par.region.split:
+; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1: omp.par.pre_finalize:
+; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT: entry:
+; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT: call void @use(i32 [[TMP0]])
+; CHECK1-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (...) @foo()
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK2-NEXT: store float [[ADD]], float* [[P]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK2-NEXT: call void @use(i32 [[CONV]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK2-NEXT: call void @use(i32 [[CONV]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK2-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK2-NEXT: store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK2-NEXT: store i32 [[ADD]], i32* [[B]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP1]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2: if.then:
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT: ret void
+; CHECK2: if.end:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2: if.then:
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT: ret void
+; CHECK2: if.end:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2: if.then:
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT: ret void
+; CHECK2: if.end:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2: if.then:
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT: ret void
+; CHECK2: if.end:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2: omp_region.end:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK2: omp_region.end4:
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split.split.split:
+; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2: omp_region.body5:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK2: seq.par.merged2:
+; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK2-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split.split.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK2: omp_region.body5.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT: br label [[OMP_REGION_END4]]
+; CHECK2: omp_region.body:
+; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2: seq.par.merged:
+; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK2-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2: omp.par.merged.split:
+; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2: omp_region.body.split:
+; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (...) @foo()
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (...) @foo()
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]]
+; CHECK2: omp_parallel:
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2: omp.par.outlined.exit:
+; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2: omp.par.exit.split:
+; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2: entry.split.split:
+; CHECK2-NEXT: call void (...) @foo()
+; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT: omp.par.entry:
+; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]]
+; CHECK2: omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT: ret void
+; CHECK2: omp.par.region:
+; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2: omp.par.merged:
+; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]]
+; CHECK2: entry.split:
+; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2: omp.par.region.split:
+; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2: omp.par.pre_finalize:
+; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT: entry:
+; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT: call void @use(i32 [[TMP0]])
+; CHECK2-NEXT: ret void
;