// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]]
+// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]]
+// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]]
+// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
-// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
-// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1: atomic_exit:
+// CHECK1-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]]
+// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]]
+// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]]
+// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
-// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK2: atomic_cont:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
-// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2: atomic_exit:
+// CHECK2-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// CHECK2-NEXT: ret void
//
//