Sequel patch to https://reviews.llvm.org/D111293.
Remove call to CodeGenFunction::InitTempAlloca() from OpenMP related
codegen part.
Also remove the metadata `!llvm.access.group` from the updated lit
tests.
Reviewed By: rjmccall
Differential Revision: https://reviews.llvm.org/D111316
Address ZeroAddrBound =
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".bound.zero.addr");
- CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
LValue NumLVal = CGF.MakeAddrLValue(
CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
C.getUIntPtrType());
- CGF.InitTempAlloca(NumLVal.getAddress(CGF),
- llvm::ConstantInt::get(CGF.IntPtrTy, 0));
+ CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
+ NumLVal.getAddress(CGF));
llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
CGF.EmitStoreOfScalar(Add, NumLVal);
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK14-NEXT: ret void
//
-//
\ No newline at end of file
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END23:%.*]]
// CHECK3: omp_if.else7:
// CHECK3: omp_if.else17:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK3-NEXT: br label [[OMP_IF_END19]]
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
// CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK3: omp.inner.for.end22:
// CHECK3-NEXT: br label [[OMP_IF_END23]]
// CHECK3: omp_if.end23:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK3: omp.inner.for.end18:
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK3: omp.inner.for.end18:
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END23:%.*]]
// CHECK4: omp_if.else7:
// CHECK4: omp_if.else17:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
// CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK4-NEXT: br label [[OMP_IF_END19]]
// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
// CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK4: omp.inner.for.end22:
// CHECK4-NEXT: br label [[OMP_IF_END23]]
// CHECK4: omp_if.end23:
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK4: omp.inner.for.end18:
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK4: omp.inner.for.end18:
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z9gtid_testv()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn4v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn5v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn6v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn1v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn2v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn3v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z9gtid_testv()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn4v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn5v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn6v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn1v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn2v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn3v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK7: omp.inner.for.cond7:
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK7: omp.inner.for.body9:
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK7-NEXT: call void @_Z9gtid_testv()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK7: omp.body.continue12:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK7: omp.inner.for.inc13:
-// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK7: omp.inner.for.end15:
// CHECK7-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn4v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7: omp_if.then:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK7: omp.inner.for.cond22:
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK7: omp.inner.for.body24:
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14
-// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14
+// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn6v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK7: omp.body.continue27:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK7: omp.inner.for.inc28:
-// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK7: omp.inner.for.end30:
// CHECK7-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn1v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK7: omp.inner.for.cond21:
-// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK7: omp.inner.for.body23:
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22
-// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22
+// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn3v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK7: omp.body.continue26:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK7: omp.inner.for.inc27:
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK7: omp.inner.for.end29:
// CHECK7-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK8: omp.inner.for.cond7:
-// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK8: omp.inner.for.body9:
-// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK8-NEXT: call void @_Z9gtid_testv()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK8: omp.body.continue12:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK8: omp.inner.for.inc13:
-// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK8: omp.inner.for.end15:
// CHECK8-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn4v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8: omp_if.then:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK8: omp.inner.for.cond22:
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK8: omp.inner.for.body24:
-// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14
-// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14
+// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn6v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK8: omp.body.continue27:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK8: omp.inner.for.inc28:
-// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK8: omp.inner.for.end30:
// CHECK8-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn1v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK8: omp.inner.for.cond21:
-// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
-// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK8: omp.inner.for.body23:
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22
-// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22
+// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn3v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK8: omp.body.continue26:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK8: omp.inner.for.inc27:
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK8: omp.inner.for.end29:
// CHECK8-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END23:%.*]]
// CHECK11: omp_if.else7:
// CHECK11: omp_if.else17:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK11-NEXT: br label [[OMP_IF_END19]]
// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
// CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK11: omp.inner.for.end22:
// CHECK11-NEXT: br label [[OMP_IF_END23]]
// CHECK11: omp_if.end23:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK11: omp.inner.for.end18:
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK11: omp.inner.for.end18:
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END23:%.*]]
// CHECK12: omp_if.else7:
// CHECK12: omp_if.else17:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4
// CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]])
// CHECK12-NEXT: br label [[OMP_IF_END19]]
// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
// CHECK12-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK12: omp.inner.for.end22:
// CHECK12-NEXT: br label [[OMP_IF_END23]]
// CHECK12: omp_if.end23:
// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK12: omp.inner.for.end18:
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK12: omp.inner.for.end18:
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z9gtid_testv()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn4v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn5v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn6v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn1v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn2v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn3v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z9gtid_testv()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn4v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn5v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn6v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn1v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn2v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn3v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK15: omp.inner.for.cond7:
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK15: omp.inner.for.body9:
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK15-NEXT: call void @_Z9gtid_testv()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK15: omp.body.continue12:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK15: omp.inner.for.inc13:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK15: omp.inner.for.end15:
// CHECK15-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn4v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15: omp_if.then:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK15: omp.inner.for.cond22:
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK15: omp.inner.for.body24:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18
-// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18
+// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn6v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK15: omp.body.continue27:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK15: omp.inner.for.inc28:
-// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK15: omp.inner.for.end30:
// CHECK15-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn1v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK15: omp.inner.for.cond21:
-// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK15: omp.inner.for.body23:
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26
-// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26
+// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn3v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK15: omp.body.continue26:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK15: omp.inner.for.inc27:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK15: omp.inner.for.end29:
// CHECK15-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK16: omp.inner.for.cond7:
-// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK16: omp.inner.for.body9:
-// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK16-NEXT: call void @_Z9gtid_testv()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK16: omp.body.continue12:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK16: omp.inner.for.inc13:
-// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK16: omp.inner.for.end15:
// CHECK16-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn4v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16: omp_if.then:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK16: omp.inner.for.cond22:
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK16: omp.inner.for.body24:
-// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18
-// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18
+// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn6v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK16: omp.body.continue27:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK16: omp.inner.for.inc28:
-// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK16: omp.inner.for.end30:
// CHECK16-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn1v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK16: omp.inner.for.cond21:
-// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
-// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK16: omp.inner.for.body23:
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26
-// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26
+// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn3v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK16: omp.body.continue26:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK16: omp.inner.for.inc27:
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK16: omp.inner.for.end29:
// CHECK16-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4
// CHECK1-NEXT: store double 3.000000e+00, double* [[B]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR6:[0-9]+]]
+// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]]
// CHECK1-NEXT: ret i32 [[CALL]]
//
//
// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[BAR_A]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
// CHECK1-NEXT: store double [[CONV]], double* addrspacecast (double addrspace(3)* @bar_b to double*), align 8
-// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR6]]
+// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: ret void
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
-// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1:[0-9]+]]
+// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32**
// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8
-// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR1]]
+// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8
// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4
// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]]
// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK6-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4
// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK6-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4
// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]]
// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]]
+// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6]]
+// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: call void @_Z4workv() #[[ATTR6]]
+// CHECK1-NEXT: call void @_Z4workv() #[[ATTR7]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: ret void
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]]
+// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6]]
+// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: call void @_Z4workv() #[[ATTR6]]
+// CHECK2-NEXT: call void @_Z4workv() #[[ATTR7]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: ret void
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]]
+// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6]]
+// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: call void @_Z4workv() #[[ATTR6]]
+// CHECK3-NEXT: call void @_Z4workv() #[[ATTR7]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: ret void
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]]
+// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2)
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8
-// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]]
+// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8
-// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]]
+// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
-// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]]
+// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]]
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8*
-// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]]
// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK1: atomic_exit:
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]]
+// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2)
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
-// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]]
+// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4
-// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]]
+// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
-// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]]
+// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]]
// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK2: atomic_cont:
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8*
-// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]]
+// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]]
// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK2: atomic_exit:
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]]
+// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2)
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
-// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]]
+// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4
-// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]]
+// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
-// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]]
+// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]]
// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK3: atomic_cont:
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8*
-// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]]
+// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]]
// CHECK3-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK3: atomic_exit:
// CHECK3-NEXT: ret void
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]]
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]]
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]]
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]]
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]]
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]]
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8*
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8*
// CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8*
// CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8*
// CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
// CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK6-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32*
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4)
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
// CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8)
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
// CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32*
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4)
// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8***
// CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8)
// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
// CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4
// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
// CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double*
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double*
// CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8)
// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8*
// CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float*
// CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4)
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8
// CHECK2-NEXT: store double [[TMP3]], double* [[E1]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8*
// CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float*
// CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4)
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16*
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8
// CHECK3-NEXT: store double [[TMP3]], double* [[E1]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8*
// CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float*
// CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4)
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16*
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK5-NEXT: ret void
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK5-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK5-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK5-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK6-NEXT: ret void
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK6-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK6-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK6-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK6-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK7-NEXT: ret void
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4
// CHECK7: omp_if.else:
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK7-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: br label [[OMP_IF_END]]
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK7-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
// CHECK7: omp_if.else:
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4
// CHECK7-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK7-NEXT: br label [[OMP_IF_END]]
// CHECK7-NEXT: call void @_Z3fn3v()
// CHECK7-NEXT: ret void
//
-//
\ No newline at end of file
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: ret void
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK4-NEXT: ret void
//
-//
\ No newline at end of file
// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK6-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK7: omp_if.else:
// CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK7-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
// CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK7-NEXT: br label [[OMP_IF_END]]
// CHECK8-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK8-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK8-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK8-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK8: omp_if.else:
// CHECK8-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK8-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
// CHECK8-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK8-NEXT: br label [[OMP_IF_END]]
// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK21: omp_if.else:
// CHECK21-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK21-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]]
// CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK21-NEXT: br label [[OMP_IF_END]]
// CHECK22-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK22-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK22-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK22-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK22-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK22-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK22: omp_if.else:
// CHECK22-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK22-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK22-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]]
// CHECK22-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK22-NEXT: br label [[OMP_IF_END]]
// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK23: omp_if.else:
// CHECK23-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK23-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK23-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]]
// CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK23-NEXT: br label [[OMP_IF_END]]
// CHECK24-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK24-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK24-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK24-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
// CHECK24-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK24-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK24: omp_if.else:
// CHECK24-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK24-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK24-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]]
// CHECK24-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK24-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK9-NEXT: ret void
// CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK10-NEXT: ret void
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK11-NEXT: ret void
// CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK12-NEXT: ret void
// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK17: omp_if.else:
// CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]]
// CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: br label [[OMP_IF_END]]
// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK17: omp_if.else:
// CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: br label [[OMP_IF_END]]
// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK17: omp_if.else:
// CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: br label [[OMP_IF_END]]
// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK17-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]]
// CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK17-NEXT: ret void
// CHECK18-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK18: omp_if.else:
// CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]]
// CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: br label [[OMP_IF_END]]
// CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK18: omp_if.else:
// CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: br label [[OMP_IF_END]]
// CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK18: omp_if.else:
// CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: br label [[OMP_IF_END]]
// CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK18-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]]
// CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK18-NEXT: ret void
// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK19: omp_if.else:
// CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]]
// CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: br label [[OMP_IF_END]]
// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK19: omp_if.else:
// CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: br label [[OMP_IF_END]]
// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK19: omp_if.else:
// CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: br label [[OMP_IF_END]]
// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK19-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]]
// CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK19-NEXT: ret void
// CHECK20-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK20: omp_if.else:
// CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]]
// CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: br label [[OMP_IF_END]]
// CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK20: omp_if.else:
// CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]]
// CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: br label [[OMP_IF_END]]
// CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK20: omp_if.else:
// CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]]
// CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: br label [[OMP_IF_END]]
// CHECK20-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK20-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]]
// CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK20-NEXT: ret void
// CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK25: omp_if.else:
// CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: br label [[OMP_IF_END]]
// CHECK25-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK25: omp_if.else:
// CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: br label [[OMP_IF_END]]
// CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK25: omp_if.else:
// CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: br label [[OMP_IF_END]]
// CHECK25-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK25-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]]
// CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK25-NEXT: ret void
// CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK26: omp_if.else:
// CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: br label [[OMP_IF_END]]
// CHECK26-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK26: omp_if.else:
// CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: br label [[OMP_IF_END]]
// CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK26: omp_if.else:
// CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: br label [[OMP_IF_END]]
// CHECK26-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK26-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]]
// CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK26-NEXT: ret void
// CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK27: omp_if.else:
// CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: br label [[OMP_IF_END]]
// CHECK27-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK27: omp_if.else:
// CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: br label [[OMP_IF_END]]
// CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK27: omp_if.else:
// CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: br label [[OMP_IF_END]]
// CHECK27-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK27-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]]
// CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK27-NEXT: ret void
// CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8*
// CHECK28: omp_if.else:
// CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]]
// CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: br label [[OMP_IF_END]]
// CHECK28-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK28: omp_if.else:
// CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: br label [[OMP_IF_END]]
// CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK28: omp_if.else:
// CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]]
// CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: br label [[OMP_IF_END]]
// CHECK28-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK28-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]]
// CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK28-NEXT: ret void
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK14-NEXT: ret void
//
-//
\ No newline at end of file
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8
// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9
-// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9
+// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18
-// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18
+// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21
-// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z9gtid_testv()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24
+// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27
-// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn4v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30
-// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30
+// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33
-// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn5v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36
+// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: omp_if.then:
-// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36
+// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36
-// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36
+// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39
-// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn6v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42
+// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45
-// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn1v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48
-// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48
+// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51
-// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn2v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54
+// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: omp_if.then:
-// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54
+// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54
-// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
+// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57
-// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn3v()
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8
// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9
-// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9
-// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9
+// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18
-// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18
-// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18
+// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21
-// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z9gtid_testv()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24
+// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27
-// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !27
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn4v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30
-// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30
-// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30
+// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33
-// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !33
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn5v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36
+// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK2: omp_if.then:
-// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36
+// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK2-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.else:
-// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36
-// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36
-// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36
+// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39
-// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !39
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn6v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42
+// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45
-// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !45
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn1v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48
-// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48
-// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48
+// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51
-// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !51
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn2v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54
+// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK2: omp_if.then:
-// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54
+// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK2-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK2: omp_if.else:
-// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54
-// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54
-// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
+// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2: omp_if.end:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57
-// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !57
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK2-NEXT: call void @_Z3fn3v()
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10
// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9
-// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14
-// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19
-// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19
-// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19
+// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z9gtid_testv()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25
+// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28
-// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z3fn4v()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK3: omp_if.then:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34
+// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*
// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34
-// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34
-// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34
+// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
+// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: omp_if.then5:
-// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34
+// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]])
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
-// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34
-// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34
-// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
+// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
+// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK3: omp_if.else16:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END18]]
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38
-// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z3fn6v()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42
-// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z3fn6v()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46
+// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49
-// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z3fn1v()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54
+// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3: omp_if.then:
-// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54
+// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
-// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54
-// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54
-// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
+// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57
-// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57
+// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK3-NEXT: call void @_Z3fn3v()
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10
// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9
-// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9
-// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9
+// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14
-// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19
-// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19
-// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19
+// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z9gtid_testv()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25
+// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28
-// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !28
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z3fn4v()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK4: omp_if.then:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34
+// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*
// CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34
-// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34
-// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34
+// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
+// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK4: omp_if.then5:
-// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34
+// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]])
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
-// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
-// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34
-// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34
-// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
+// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
+// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
+// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK4: omp_if.else16:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END18]]
// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38
-// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !38
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z3fn6v()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42
-// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !42
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z3fn6v()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46
+// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49
-// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !49
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z3fn1v()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54
+// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK4: omp_if.then:
-// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54
+// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
-// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54
-// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54
-// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54
+// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57
-// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !57
+// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK4-NEXT: call void @_Z3fn3v()
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57
+// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: store i32 0, i32* @Arg, align 4
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z9gtid_testv()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn4v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn5v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn6v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn1v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn2v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn3v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
-// CHECK6-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: store i32 0, i32* @Arg, align 4
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z9gtid_testv()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn4v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn5v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn6v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn1v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn2v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn3v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK7: omp.inner.for.cond7:
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK7: omp.inner.for.body9:
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7
-// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7
+// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK7-NEXT: call void @_Z9gtid_testv()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK7: omp.body.continue12:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK7: omp.inner.for.inc13:
-// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK7: omp.inner.for.end15:
// CHECK7-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10
-// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !10
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn4v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7: omp_if.then:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK7: omp.inner.for.cond22:
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK7: omp.inner.for.body24:
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn6v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK7: omp.body.continue27:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK7: omp.inner.for.inc28:
-// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK7: omp.inner.for.end30:
// CHECK7-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19
-// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !19
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn1v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK7: omp.inner.for.cond21:
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23
+// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK7: omp.inner.for.body23:
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23
-// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !23
+// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn3v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK7: omp.body.continue26:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK7: omp.inner.for.inc27:
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK7: omp.inner.for.end29:
// CHECK7-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
-// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK8: omp.inner.for.cond7:
-// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
-// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7
+// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK8: omp.inner.for.body9:
-// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7
-// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7
+// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK8-NEXT: call void @_Z9gtid_testv()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK8: omp.body.continue12:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK8: omp.inner.for.inc13:
-// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7
+// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK8: omp.inner.for.end15:
// CHECK8-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10
-// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !10
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn4v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8: omp_if.then:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK8: omp.inner.for.cond22:
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK8: omp.inner.for.body24:
-// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn6v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK8: omp.body.continue27:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK8: omp.inner.for.inc28:
-// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK8: omp.inner.for.end30:
// CHECK8-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19
-// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !19
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn1v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK8: omp.inner.for.cond21:
-// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23
+// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK8: omp.inner.for.body23:
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23
-// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !23
+// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn3v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK8: omp.body.continue26:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK8: omp.inner.for.inc27:
-// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23
+// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK8: omp.inner.for.end29:
// CHECK8-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8
// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13
-// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13
-// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13
+// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17
-// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22
-// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22
-// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22
+// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25
-// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z9gtid_testv()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28
+// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31
-// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn4v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34
-// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34
-// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
+// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37
-// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn5v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40
+// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK9: omp_if.then:
-// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40
+// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK9-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK9: omp_if.else:
-// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40
-// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40
-// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40
+// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9: omp_if.end:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43
-// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn6v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46
+// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49
-// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn1v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52
-// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52
-// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52
+// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55
-// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn2v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58
+// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK9: omp_if.then:
-// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58
+// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK9-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK9: omp_if.else:
-// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58
-// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58
-// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
+// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9: omp_if.end:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9: omp.inner.for.cond:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61
-// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61
+// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK9-NEXT: call void @_Z3fn3v()
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8
// CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13
-// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13
-// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13
+// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17
-// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17
+// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22
-// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22
-// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22
+// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25
-// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z9gtid_testv()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28
+// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31
-// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !31
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn4v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34
-// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34
-// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34
+// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37
-// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !37
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn5v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40
+// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK10: omp_if.then:
-// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40
+// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK10-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK10: omp_if.else:
-// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40
-// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40
-// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40
+// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10: omp_if.end:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
-// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43
-// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !43
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn6v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46
+// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49
-// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !49
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn1v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52
-// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52
-// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52
-// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52
+// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55
-// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !55
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn2v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58
+// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK10: omp_if.then:
-// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58
+// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK10-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK10: omp_if.else:
-// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
-// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58
-// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58
-// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
+// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10: omp_if.end:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58
+// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10: omp.inner.for.cond:
-// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
-// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61
+// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10: omp.inner.for.body:
-// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61
-// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !61
+// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK10-NEXT: call void @_Z3fn3v()
// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10: omp.body.continue:
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10: omp.inner.for.inc:
-// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14
// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13
-// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13
-// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13
+// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23
-// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23
-// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23
+// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26
-// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z9gtid_testv()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29
+// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32
-// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z3fn4v()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK11: omp_if.then:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38
+// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*
// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38
-// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38
-// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38
+// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
+// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK11: omp_if.then5:
-// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38
+// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]])
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
-// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38
-// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38
-// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38
-// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38
+// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
+// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38
+// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK11: omp_if.else16:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END18]]
// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42
-// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z3fn6v()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46
-// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z3fn6v()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50
+// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53
-// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z3fn1v()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58
+// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK11: omp_if.then:
-// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58
+// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
-// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58
-// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58
-// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
+// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK11: omp.inner.for.cond:
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61
-// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61
+// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK11-NEXT: call void @_Z3fn3v()
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14
// CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13
-// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13
-// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13
+// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4
+// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13
+// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23
+// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23
-// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23
-// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23
+// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23
+// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26
-// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z9gtid_testv()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26
+// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29
+// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29
+// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
-// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32
-// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !32
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z3fn4v()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32
+// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK12: omp_if.then:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38
+// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
// CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*
// CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38
-// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38
-// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38
+// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
+// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK12: omp_if.then5:
-// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38
+// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]])
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
-// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38
-// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38
-// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38
-// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38
+// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
+// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
-// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38
+// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK12: omp_if.else16:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END18]]
// CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42
-// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !42
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z3fn6v()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
+// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46
-// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !46
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z3fn6v()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
+// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50
+// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50
+// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
-// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50
+// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53
-// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !53
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z3fn1v()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
+// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58
+// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8
// CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
// CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK12: omp_if.then:
-// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58
+// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
-// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
-// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58
-// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58
-// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58
+// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
+// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
+// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
-// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58
+// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK12: omp.inner.for.cond:
-// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
-// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61
+// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK12: omp.inner.for.body:
-// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61
-// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !61
+// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK12-NEXT: call void @_Z3fn3v()
// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK12: omp.body.continue:
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12: omp.inner.for.inc:
-// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61
+// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: store i32 0, i32* @Arg, align 4
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z9gtid_testv()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn4v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn5v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn6v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn1v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn2v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn3v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
-// CHECK14-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: store i32 0, i32* @Arg, align 4
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z9gtid_testv()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn4v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn5v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn6v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn1v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn2v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn3v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
-// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK15: omp.inner.for.cond7:
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11
+// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK15: omp.inner.for.body9:
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11
-// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11
+// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK15-NEXT: call void @_Z9gtid_testv()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK15: omp.body.continue12:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK15: omp.inner.for.inc13:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK15: omp.inner.for.end15:
// CHECK15-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14
-// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !14
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn4v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15: omp_if.then:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK15: omp.inner.for.cond22:
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK15: omp.inner.for.body24:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn6v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK15: omp.body.continue27:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK15: omp.inner.for.inc28:
-// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK15: omp.inner.for.end30:
// CHECK15-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23
-// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !23
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn1v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK15: omp.inner.for.cond21:
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27
+// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK15: omp.inner.for.body23:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27
-// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !27
+// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn3v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK15: omp.body.continue26:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK15: omp.inner.for.inc27:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK15: omp.inner.for.end29:
// CHECK15-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
-// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK16: omp.inner.for.cond7:
-// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
-// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11
+// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK16: omp.inner.for.body9:
-// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11
-// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11
+// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK16-NEXT: call void @_Z9gtid_testv()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK16: omp.body.continue12:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK16: omp.inner.for.inc13:
-// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11
+// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK16: omp.inner.for.end15:
// CHECK16-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14
-// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !14
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn4v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16: omp_if.then:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK16: omp.inner.for.cond22:
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK16: omp.inner.for.body24:
-// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn6v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK16: omp.body.continue27:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK16: omp.inner.for.inc28:
-// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK16: omp.inner.for.end30:
// CHECK16-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23
-// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !23
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn1v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK16: omp.inner.for.cond21:
-// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27
+// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK16: omp.inner.for.body23:
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27
-// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !27
+// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn3v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK16: omp.body.continue26:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK16: omp.inner.for.inc27:
-// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27
+// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK16: omp.inner.for.end29:
// CHECK16-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK: [[DEPOBJ_SIZE_ADDR1:%.+]] = alloca i64,
// CHECK: = alloca i64,
// CHECK: [[DEP_COUNTER_ADDR:%.+]] = alloca i64,
- // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]],
- // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: [[ALLOC:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 65, i64 48, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, [[PRIVATES_TY:%.+]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*))
// CHECK: [[EVT_VAL:%.+]] = call i8* @__kmpc_task_allow_completion_event(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[ALLOC]])
// CHECK: [[D_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP]], i{{.+}} -1
// CHECK: [[D_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP_BASE]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SIZE1:%.+]] = load i64, i64* [[D_DEP_BASE_SIZE]],
+ // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]],
// CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR]],
// CHECK: [[SIZE:%.+]] = add nuw i64 [[SZ]], [[SIZE1]]
// CHECK: store i64 [[SIZE]], i64* [[DEPOBJ_SIZE_ADDR]],
// CHECK: [[X_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP]], i{{.+}} -1
// CHECK: [[X_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP_BASE]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SIZE2:%.+]] = load i64, i64* [[X_DEP_BASE_SIZE]],
+ // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]],
// CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR1]],
// CHECK: [[SIZE3:%.+]] = add nuw i64 [[SZ]], [[SIZE2]]
// CHECK: store i64 [[SIZE3]], i64* [[DEPOBJ_SIZE_ADDR1]],
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK5: omp_if.else:
// CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK5-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK6: omp_if.else:
// CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK6-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK13: omp_if.else:
// CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK13-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK14: omp_if.else:
// CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK14-NEXT: br label [[OMP_IF_END]]
// CHECK14-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK14-NEXT: ret void
//
-//
\ No newline at end of file
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1: omp_if.else:
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2: omp_if.else:
// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK2-NEXT: br label [[OMP_IF_END]]
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK3: omp_if.else6:
// CHECK3: omp_if.else16:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END18]]
// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK3: omp.inner.for.end21:
// CHECK3-NEXT: br label [[OMP_IF_END22]]
// CHECK3: omp_if.end22:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK3: omp.inner.for.end18:
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK3: omp_if.else:
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK3: omp.inner.for.end18:
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3: omp_if.end:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK3: omp_if.else:
// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK3-NEXT: br label [[OMP_IF_END]]
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK4: omp_if.else6:
// CHECK4: omp_if.else16:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END18]]
// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
// CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK4: omp.inner.for.end21:
// CHECK4-NEXT: br label [[OMP_IF_END22]]
// CHECK4: omp_if.end22:
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK4: omp.inner.for.end18:
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK4: omp_if.else:
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK4: omp.inner.for.end18:
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4: omp_if.end:
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK4: omp_if.else:
// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK4-NEXT: br label [[OMP_IF_END]]
// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z9gtid_testv()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn4v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn5v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn6v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK5-NEXT: call void @_Z3fn1v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: store i32 100, i32* [[I]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK5: omp.inner.for.cond7:
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK5: omp.inner.for.body9:
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK5-NEXT: call void @_Z3fn2v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK5: omp.body.continue12:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK5: omp.inner.for.inc13:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK5: omp.inner.for.end15:
// CHECK5-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK5: omp.inner.for.cond21:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK5: omp.inner.for.body23:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK5-NEXT: call void @_Z3fn3v()
// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK5: omp.body.continue26:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK5: omp.inner.for.inc27:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK5: omp.inner.for.end29:
// CHECK5-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z9gtid_testv()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn4v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12
-// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn5v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15
-// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn6v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK6: omp.inner.for.cond:
-// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK6: omp.inner.for.body:
-// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK6-NEXT: call void @_Z3fn1v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK6: omp.body.continue:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK6: omp.inner.for.inc:
-// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK6: omp.inner.for.end:
// CHECK6-NEXT: store i32 100, i32* [[I]], align 4
// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK6: omp.inner.for.cond7:
-// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK6: omp.inner.for.body9:
-// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21
-// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK6-NEXT: call void @_Z3fn2v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK6: omp.body.continue12:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK6: omp.inner.for.inc13:
-// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21
+// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK6: omp.inner.for.end15:
// CHECK6-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK6: omp.inner.for.cond21:
-// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK6: omp.inner.for.body23:
-// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24
-// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK6-NEXT: call void @_Z3fn3v()
// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK6: omp.body.continue26:
// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK6: omp.inner.for.inc27:
-// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24
+// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK6: omp.inner.for.end29:
// CHECK6-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK7: omp.inner.for.cond7:
-// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK7: omp.inner.for.body9:
-// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK7-NEXT: call void @_Z9gtid_testv()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK7: omp.body.continue12:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK7: omp.inner.for.inc13:
-// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK7: omp.inner.for.end15:
// CHECK7-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn4v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7: omp_if.then:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK7: omp.inner.for.cond22:
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK7: omp.inner.for.body24:
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14
-// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14
+// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn6v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK7: omp.body.continue27:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK7: omp.inner.for.inc28:
-// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK7: omp.inner.for.end30:
// CHECK7-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7: omp.inner.for.cond:
-// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7: omp.inner.for.body:
-// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK7-NEXT: call void @_Z3fn1v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7: omp.body.continue:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7: omp.inner.for.inc:
-// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK7: omp.inner.for.end:
// CHECK7-NEXT: store i32 100, i32* [[I]], align 4
// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK7: omp.inner.for.cond21:
-// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
-// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK7: omp.inner.for.body23:
-// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22
-// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22
+// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK7-NEXT: call void @_Z3fn3v()
// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK7: omp.body.continue26:
// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK7: omp.inner.for.inc27:
-// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK7: omp.inner.for.end29:
// CHECK7-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK8: omp.inner.for.cond7:
-// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
-// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK8: omp.inner.for.body9:
-// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6
-// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6
+// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK8-NEXT: call void @_Z9gtid_testv()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK8: omp.body.continue12:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK8: omp.inner.for.inc13:
-// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6
+// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK8: omp.inner.for.end15:
// CHECK8-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9
-// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn4v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8: omp_if.then:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK8: omp.inner.for.cond22:
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK8: omp.inner.for.body24:
-// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14
-// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14
+// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn6v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK8: omp.body.continue27:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK8: omp.inner.for.inc28:
-// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14
+// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK8: omp.inner.for.end30:
// CHECK8-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8: omp.inner.for.cond:
-// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
-// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8: omp.inner.for.body:
-// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18
-// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18
+// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK8-NEXT: call void @_Z3fn1v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8: omp.body.continue:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8: omp.inner.for.inc:
-// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18
+// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK8: omp.inner.for.end:
// CHECK8-NEXT: store i32 100, i32* [[I]], align 4
// CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK8: omp.inner.for.cond21:
-// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
-// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK8: omp.inner.for.body23:
-// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22
-// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22
+// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK8-NEXT: call void @_Z3fn3v()
// CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK8: omp.body.continue26:
// CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK8: omp.inner.for.inc27:
-// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22
+// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK8: omp.inner.for.end29:
// CHECK8-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9: omp_if.else:
// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK9-NEXT: br label [[OMP_IF_END]]
// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK10: omp_if.else:
// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK10-NEXT: br label [[OMP_IF_END]]
// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]]
// CHECK10: omp.inner.for.end:
// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10: omp.loop.exit:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK11: omp_if.else6:
// CHECK11: omp_if.else16:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END18]]
// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
// CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK11: omp.inner.for.end21:
// CHECK11-NEXT: br label [[OMP_IF_END22]]
// CHECK11: omp_if.end22:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK11: omp.inner.for.end18:
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK11: omp_if.else:
// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK11: omp.inner.for.end18:
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11: omp_if.end:
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK11: omp_if.else:
// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK11-NEXT: br label [[OMP_IF_END]]
// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
+// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END22:%.*]]
// CHECK12: omp_if.else6:
// CHECK12: omp_if.else16:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4
// CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END18]]
// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
// CHECK12-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]]
// CHECK12: omp.inner.for.end21:
// CHECK12-NEXT: br label [[OMP_IF_END22]]
// CHECK12: omp_if.end22:
// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]]
// CHECK12: omp.inner.for.end18:
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK12: omp_if.else:
// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]]
// CHECK12: omp.inner.for.end18:
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12: omp_if.end:
// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK12: omp_if.else:
// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
// CHECK12-NEXT: br label [[OMP_IF_END]]
// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
-// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]]
+// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]]
// CHECK12: omp.inner.for.end:
// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK12: omp.loop.exit:
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z9gtid_testv()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn4v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn5v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn6v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK13-NEXT: call void @_Z3fn1v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: store i32 100, i32* [[I]], align 4
// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK13: omp.inner.for.cond7:
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK13: omp.inner.for.body9:
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK13-NEXT: call void @_Z3fn2v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK13: omp.body.continue12:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK13: omp.inner.for.inc13:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK13: omp.inner.for.end15:
// CHECK13-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK13: omp.inner.for.cond21:
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK13: omp.inner.for.body23:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK13-NEXT: call void @_Z3fn3v()
// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK13: omp.body.continue26:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK13: omp.inner.for.inc27:
-// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK13: omp.inner.for.end29:
// CHECK13-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z9gtid_testv()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn4v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16
-// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn5v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19
-// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn6v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK14: omp.inner.for.cond:
-// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK14: omp.inner.for.body:
-// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK14-NEXT: call void @_Z3fn1v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK14: omp.body.continue:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK14: omp.inner.for.inc:
-// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK14: omp.inner.for.end:
// CHECK14-NEXT: store i32 100, i32* [[I]], align 4
// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK14: omp.inner.for.cond7:
-// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK14: omp.inner.for.body9:
-// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25
-// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK14-NEXT: call void @_Z3fn2v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK14: omp.body.continue12:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK14: omp.inner.for.inc13:
-// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25
+// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]]
// CHECK14: omp.inner.for.end15:
// CHECK14-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK14: omp.inner.for.cond21:
-// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK14: omp.inner.for.body23:
-// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28
-// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK14-NEXT: call void @_Z3fn3v()
// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK14: omp.body.continue26:
// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK14: omp.inner.for.inc27:
-// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28
+// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]]
// CHECK14: omp.inner.for.end29:
// CHECK14-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK15: omp.inner.for.cond7:
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK15: omp.inner.for.body9:
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK15-NEXT: call void @_Z9gtid_testv()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK15: omp.body.continue12:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK15: omp.inner.for.inc13:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK15: omp.inner.for.end15:
// CHECK15-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn4v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15: omp_if.then:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK15: omp.inner.for.cond22:
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK15: omp.inner.for.body24:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18
-// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18
+// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn6v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK15: omp.body.continue27:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK15: omp.inner.for.inc28:
-// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK15: omp.inner.for.end30:
// CHECK15-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK15: omp.inner.for.cond:
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK15-NEXT: call void @_Z3fn1v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: store i32 100, i32* [[I]], align 4
// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK15: omp.inner.for.cond21:
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK15: omp.inner.for.body23:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26
-// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26
+// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK15-NEXT: call void @_Z3fn3v()
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK15: omp.body.continue26:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK15: omp.inner.for.inc27:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK15: omp.inner.for.end29:
// CHECK15-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]]
// CHECK16: omp.inner.for.cond7:
-// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
-// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
+// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4
// CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]]
// CHECK16: omp.inner.for.body9:
-// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10
-// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10
+// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4
+// CHECK16-NEXT: call void @_Z9gtid_testv()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]]
// CHECK16: omp.body.continue12:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]]
// CHECK16: omp.inner.for.inc13:
-// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10
+// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK16: omp.inner.for.end15:
// CHECK16-NEXT: store i32 100, i32* [[I6]], align 4
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13
-// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn4v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16: omp_if.then:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]]
// CHECK16: omp.inner.for.cond22:
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
// CHECK16: omp.inner.for.body24:
-// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18
-// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18
+// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn6v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]]
// CHECK16: omp.body.continue27:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]]
// CHECK16: omp.inner.for.inc28:
-// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18
+// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK16: omp.inner.for.end30:
// CHECK16-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK16: omp.inner.for.cond:
-// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
-// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK16: omp.inner.for.body:
-// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22
-// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22
+// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4
+// CHECK16-NEXT: call void @_Z3fn1v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK16: omp.body.continue:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK16: omp.inner.for.inc:
-// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
+// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK16: omp.inner.for.end:
// CHECK16-NEXT: store i32 100, i32* [[I]], align 4
// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]]
// CHECK16: omp.inner.for.cond21:
-// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
-// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
+// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4
// CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
// CHECK16: omp.inner.for.body23:
-// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
-// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26
-// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26
+// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4
+// CHECK16-NEXT: call void @_Z3fn3v()
// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]]
// CHECK16: omp.body.continue26:
// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]]
// CHECK16: omp.inner.for.inc27:
-// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26
+// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4
// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]]
// CHECK16: omp.inner.for.end29:
// CHECK16-NEXT: store i32 100, i32* [[I20]], align 4
// CHECK1-NEXT: [[C:%.*]] = alloca i32***, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4
// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[B]], i64 [[TMP4]], i32**** [[C]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
// CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 8
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK1-NEXT: store i32* [[TMP3]], i32** [[P]], align 8
// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
+// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[P]], i32** [[A_ADDR]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
// CHECK1: if.end:
// CHECK1-NEXT: ret void
//
-//
\ No newline at end of file