// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK3-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK3-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
// CHECK3-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
// CHECK3-NEXT: br label [[OMP_CRITICAL_SYNC]]
// CHECK3: omp.critical.sync:
-// CHECK3-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]])
+// CHECK3-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK3-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK3-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
// CHECK3-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK4-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK4-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
// CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
// CHECK4-NEXT: br label [[OMP_CRITICAL_SYNC]]
// CHECK4: omp.critical.sync:
-// CHECK4-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]])
+// CHECK4-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK4-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK4-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
// CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK5-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK5-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
// CHECK5-NEXT: br label [[OMP_CRITICAL_SYNC]]
// CHECK5: omp.critical.sync:
-// CHECK5-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]])
+// CHECK5-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK5-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK5-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
// CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
// CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]]
// CHECK1: omp.critical.sync:
-// CHECK1-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]])
+// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK1-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
// CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]]
// CHECK2: omp.critical.sync:
-// CHECK2-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]])
+// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK2-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]]
declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
-declare i32 @__kmpc_warp_active_thread_mask()
+declare i64 @__kmpc_warp_active_thread_mask()
-declare void @__kmpc_syncwarp(i32)
+declare void @__kmpc_syncwarp(i64)
declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
+; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_syncwarp(i32)
+; CHECK-NEXT: declare void @__kmpc_syncwarp(i64)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)
; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
+; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)